summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS8
-rw-r--r--accel/kvm/kvm-accel-ops.c12
-rw-r--r--accel/meson.build12
-rw-r--r--accel/qtest/qtest.c1
-rw-r--r--accel/stubs/hax-stub.c2
-rw-r--r--accel/stubs/kvm-stub.c5
-rw-r--r--accel/stubs/meson.build11
-rw-r--r--accel/tcg/cpu-exec.c4
-rw-r--r--accel/tcg/tcg-accel-ops-icount.c1
-rw-r--r--accel/tcg/tcg-accel-ops-mttcg.c1
-rw-r--r--accel/tcg/tcg-accel-ops-rr.c1
-rw-r--r--accel/tcg/tcg-accel-ops.c1
-rw-r--r--audio/alsaaudio.c1
-rw-r--r--audio/audio.c194
-rw-r--r--audio/audio_int.h20
-rw-r--r--audio/coreaudio.c15
-rw-r--r--audio/dsoundaudio.c30
-rw-r--r--audio/jackaudio.c5
-rw-r--r--audio/noaudio.c1
-rw-r--r--audio/ossaudio.c17
-rw-r--r--audio/paaudio.c49
-rw-r--r--audio/sdlaudio.c21
-rw-r--r--audio/wavaudio.c1
-rw-r--r--block.c321
-rw-r--r--block/amend.c28
-rw-r--r--block/backup.c1
-rw-r--r--block/blkverify.c1
-rw-r--r--block/block-backend.c166
-rw-r--r--block/block-copy.c151
-rw-r--r--block/commit.c5
-rw-r--r--block/copy-before-write.c267
-rw-r--r--block/copy-before-write.h7
-rw-r--r--block/coroutines.h81
-rw-r--r--block/create.c2
-rw-r--r--block/crypto.c69
-rw-r--r--block/curl.c92
-rw-r--r--block/dirty-bitmap.c20
-rw-r--r--block/dmg.c1
-rw-r--r--block/export/export.c2
-rw-r--r--block/export/fuse.c26
-rw-r--r--block/file-posix.c1
-rw-r--r--block/io.c152
-rw-r--r--block/meson.build9
-rw-r--r--block/mirror.c5
-rw-r--r--block/monitor/bitmap-qmp-cmds.c11
-rw-r--r--block/nbd.c1
-rw-r--r--block/nvme.c1
-rw-r--r--block/parallels-ext.c1
-rw-r--r--block/parallels.c3
-rw-r--r--block/preallocate.c15
-rw-r--r--block/qcow.c1
-rw-r--r--block/qcow2-cache.c1
-rw-r--r--block/qcow2-cluster.c1
-rw-r--r--block/qcow2-refcount.c1
-rw-r--r--block/qcow2-snapshot.c1
-rw-r--r--block/qcow2.c1
-rw-r--r--block/qed-l2-cache.c1
-rw-r--r--block/qed-table.c1
-rw-r--r--block/qed.c1
-rw-r--r--block/quorum.c1
-rw-r--r--block/raw-format.c1
-rw-r--r--block/reqlist.c85
-rw-r--r--block/snapshot-access.c132
-rw-r--r--block/snapshot.c28
-rw-r--r--block/stream.c2
-rw-r--r--block/vdi.c1
-rw-r--r--block/vhdx-log.c1
-rw-r--r--block/vhdx.c1
-rw-r--r--block/vmdk.c1
-rw-r--r--block/vpc.c1
-rw-r--r--block/win32-aio.c1
-rw-r--r--blockdev.c29
-rw-r--r--blockjob.c16
-rwxr-xr-xconfigure4
-rw-r--r--cpu.c8
-rw-r--r--docs/specs/index.rst1
-rw-r--r--docs/specs/sev-guest-firmware.rst125
-rw-r--r--docs/system/riscv/virt.rst16
-rw-r--r--docs/tools/qemu-storage-daemon.rst7
-rw-r--r--fsdev/file-op-9p.h9
-rw-r--r--fsdev/meson.build1
-rw-r--r--fsdev/p9array.h38
-rw-r--r--hw/9pfs/9p-local.c27
-rw-r--r--hw/9pfs/9p-proxy.c38
-rw-r--r--hw/9pfs/9p-synth.c6
-rw-r--r--hw/9pfs/9p-util-darwin.c97
-rw-r--r--hw/9pfs/9p-util-linux.c (renamed from hw/9pfs/9p-util.c)8
-rw-r--r--hw/9pfs/9p-util.h78
-rw-r--r--hw/9pfs/9p.c104
-rw-r--r--hw/9pfs/9p.h30
-rw-r--r--hw/9pfs/codir.c34
-rw-r--r--hw/9pfs/coth.h4
-rw-r--r--hw/9pfs/meson.build3
-rw-r--r--hw/arm/virt.c7
-rw-r--r--hw/block/dataplane/xen-block.c1
-rw-r--r--hw/block/fdc.c1
-rw-r--r--hw/block/pflash_cfi01.c2
-rw-r--r--hw/display/edid-generate.c66
-rw-r--r--hw/display/trace-events3
-rw-r--r--hw/display/vmware_vga.c30
-rw-r--r--hw/ide/core.c8
-rw-r--r--hw/intc/Kconfig3
-rw-r--r--hw/intc/arm_gicv3.c8
-rw-r--r--hw/intc/arm_gicv3_cpuif.c3
-rw-r--r--hw/intc/arm_gicv3_dist.c4
-rw-r--r--hw/intc/arm_gicv3_its.c69
-rw-r--r--hw/intc/meson.build1
-rw-r--r--hw/intc/riscv_imsic.c448
-rw-r--r--hw/intc/trace-events21
-rw-r--r--hw/nvme/ctrl.c235
-rw-r--r--hw/nvme/dif.c300
-rw-r--r--hw/nvme/dif.h191
-rw-r--r--hw/nvme/ns.c50
-rw-r--r--hw/nvme/nvme.h58
-rw-r--r--hw/nvme/trace-events12
-rw-r--r--hw/nvram/spapr_nvram.c2
-rw-r--r--hw/ppc/spapr.c1
-rw-r--r--hw/ppc/spapr_softmmu.c1
-rw-r--r--hw/riscv/Kconfig2
-rw-r--r--hw/riscv/opentitan.c12
-rw-r--r--hw/riscv/virt.c698
-rw-r--r--hw/scsi/scsi-disk.c1
-rw-r--r--hw/tpm/tpm_ppi.c2
-rw-r--r--hw/usb/dev-mtp.c4
-rw-r--r--hw/usb/hcd-ohci.c297
-rw-r--r--hw/usb/hcd-xhci.c2
-rw-r--r--hw/usb/redirect.c17
-rw-r--r--hw/usb/trace-events2
-rw-r--r--include/block/block-common.h419
-rw-r--r--include/block/block-copy.h2
-rw-r--r--include/block/block-global-state.h253
-rw-r--r--include/block/block-io.h368
-rw-r--r--include/block/block.h878
-rw-r--r--include/block/block_int-common.h1246
-rw-r--r--include/block/block_int-global-state.h329
-rw-r--r--include/block/block_int-io.h194
-rw-r--r--include/block/block_int.h1475
-rw-r--r--include/block/blockjob.h29
-rw-r--r--include/block/blockjob_int.h28
-rw-r--r--include/block/dirty-bitmap.h4
-rw-r--r--include/block/nvme.h81
-rw-r--r--include/block/reqlist.h75
-rw-r--r--include/block/snapshot.h13
-rw-r--r--include/exec/cpu-all.h4
-rw-r--r--include/exec/cpu-common.h39
-rw-r--r--include/exec/cpu_ldst.h1
-rw-r--r--include/exec/exec-all.h26
-rw-r--r--include/exec/gdbstub.h25
-rw-r--r--include/exec/poison.h2
-rw-r--r--include/hw/arm/virt.h1
-rw-r--r--include/hw/core/cpu.h33
-rw-r--r--include/hw/intc/riscv_imsic.h68
-rw-r--r--include/hw/riscv/opentitan.h4
-rw-r--r--include/hw/riscv/virt.h41
-rw-r--r--include/qemu-common.h2
-rw-r--r--include/qemu/coroutine-tls.h165
-rw-r--r--include/qemu/cpuid.h20
-rw-r--r--include/qemu/hbitmap.h12
-rw-r--r--include/qemu/job.h22
-rw-r--r--include/qemu/main-loop.h42
-rw-r--r--include/qemu/memalign.h61
-rw-r--r--include/qemu/osdep.h31
-rw-r--r--include/qemu/rcu.h7
-rw-r--r--include/qemu/typedefs.h2
-rw-r--r--include/qemu/xattr.h4
-rw-r--r--include/sysemu/accel-ops.h3
-rw-r--r--include/sysemu/arch_init.h2
-rw-r--r--include/sysemu/block-backend-common.h102
-rw-r--r--include/sysemu/block-backend-global-state.h116
-rw-r--r--include/sysemu/block-backend-io.h161
-rw-r--r--include/sysemu/block-backend.h269
-rw-r--r--include/sysemu/blockdev.h13
-rw-r--r--include/sysemu/hax.h18
-rw-r--r--include/sysemu/hw_accel.h5
-rw-r--r--include/sysemu/kvm.h6
-rw-r--r--include/sysemu/memory_mapping.h5
-rw-r--r--include/sysemu/os-posix.h1
-rw-r--r--include/sysemu/os-win32.h8
-rw-r--r--include/tcg/tcg-opc.h3
-rw-r--r--include/tcg/tcg.h5
-rw-r--r--job.c10
-rw-r--r--meson.build25
-rw-r--r--migration/block.c2
-rw-r--r--migration/migration.c14
-rw-r--r--migration/savevm.c8
-rw-r--r--monitor/qmp-cmds.c2
-rw-r--r--nbd/server.c1
-rw-r--r--net/l2tpv3.c2
-rw-r--r--os-posix.c6
-rw-r--r--pc-bios/bios-256k.binbin262144 -> 262144 bytes
-rw-r--r--pc-bios/bios-microvm.binbin131072 -> 131072 bytes
-rw-r--r--pc-bios/bios.binbin131072 -> 131072 bytes
-rw-r--r--pc-bios/vgabios-ati.binbin39936 -> 39936 bytes
-rw-r--r--pc-bios/vgabios-bochs-display.binbin28672 -> 28672 bytes
-rw-r--r--pc-bios/vgabios-cirrus.binbin39424 -> 39424 bytes
-rw-r--r--pc-bios/vgabios-qxl.binbin39424 -> 39424 bytes
-rw-r--r--pc-bios/vgabios-ramfb.binbin28672 -> 28672 bytes
-rw-r--r--pc-bios/vgabios-stdvga.binbin39424 -> 39424 bytes
-rw-r--r--pc-bios/vgabios-virtio.binbin39424 -> 39424 bytes
-rw-r--r--pc-bios/vgabios-vmware.binbin39424 -> 39424 bytes
-rw-r--r--pc-bios/vgabios.binbin38912 -> 38912 bytes
-rw-r--r--plugins/loader.c1
-rw-r--r--python/qemu/aqmp/legacy.py7
-rw-r--r--python/qemu/aqmp/protocol.py381
-rw-r--r--python/tests/protocol.py45
-rw-r--r--qapi/block-core.json14
-rw-r--r--qemu-img.c1
-rw-r--r--qemu-io-cmds.c1
-rw-r--r--qom/object.c1
m---------roms/seabios0
-rwxr-xr-xscripts/qmp/qmp-shell-wrap2
-rw-r--r--softmmu/arch_init.c9
-rw-r--r--softmmu/cpu-timers.c1
-rw-r--r--softmmu/cpus.c37
-rw-r--r--softmmu/globals.c2
-rw-r--r--softmmu/memory_mapping.c1
-rw-r--r--softmmu/meson.build24
-rw-r--r--softmmu/physmem.c8
-rw-r--r--softmmu/qdev-monitor.c11
-rw-r--r--softmmu/vl.c5
-rw-r--r--storage-daemon/qemu-storage-daemon.c58
-rw-r--r--stubs/iothread-lock-block.c8
-rw-r--r--stubs/meson.build3
-rw-r--r--target/alpha/cpu-qom.h3
-rw-r--r--target/alpha/cpu.h11
-rw-r--r--target/alpha/translate.c1
-rw-r--r--target/arm/cpu-qom.h3
-rw-r--r--target/arm/cpu.c6
-rw-r--r--target/arm/cpu.h12
-rw-r--r--target/arm/cpu64.c24
-rw-r--r--target/arm/hvf_arm.h2
-rw-r--r--target/arm/translate-neon.c13
-rw-r--r--target/avr/cpu-qom.h3
-rw-r--r--target/avr/cpu.h13
-rw-r--r--target/cris/cpu-qom.h3
-rw-r--r--target/cris/cpu.h7
-rw-r--r--target/hexagon/cpu.h23
-rw-r--r--target/hppa/cpu-qom.h3
-rw-r--r--target/hppa/cpu.h12
-rw-r--r--target/i386/cpu-qom.h3
-rw-r--r--target/i386/cpu.h7
-rw-r--r--target/i386/hax/hax-all.c11
-rw-r--r--target/i386/hvf/hvf.c1
-rw-r--r--target/i386/hvf/x86_emu.c110
-rw-r--r--target/i386/hvf/x86_emu.h22
-rw-r--r--target/i386/kvm/kvm.c1
-rw-r--r--target/i386/nvmm/nvmm-all.c14
-rw-r--r--target/i386/tcg/sysemu/excp_helper.c1
-rw-r--r--target/i386/tcg/sysemu/misc_helper.c1
-rw-r--r--target/i386/whpx/whpx-accel-ops.c6
-rw-r--r--target/i386/whpx/whpx-all.c18
-rw-r--r--target/m68k/cpu-qom.h3
-rw-r--r--target/m68k/cpu.h7
-rw-r--r--target/microblaze/cpu-qom.h3
-rw-r--r--target/microblaze/cpu.h9
-rw-r--r--target/microblaze/mmu.h2
-rw-r--r--target/mips/cpu-qom.h3
-rw-r--r--target/mips/cpu.h10
-rw-r--r--target/mips/internal.h15
-rw-r--r--target/nios2/cpu.c10
-rw-r--r--target/nios2/cpu.h10
-rw-r--r--target/nios2/helper.h6
-rw-r--r--target/nios2/meson.build3
-rw-r--r--target/nios2/mmu.c257
-rw-r--r--target/nios2/mmu.h3
-rw-r--r--target/nios2/op_helper.c29
-rw-r--r--target/nios2/trace-events10
-rw-r--r--target/nios2/translate.c73
-rw-r--r--target/openrisc/cpu.h17
-rw-r--r--target/ppc/cpu-qom.h5
-rw-r--r--target/ppc/cpu.h7
-rw-r--r--target/ppc/fpu_helper.c58
-rw-r--r--target/ppc/int_helper.c28
-rw-r--r--target/ppc/translate/vmx-impl.c.inc42
-rw-r--r--target/ppc/translate/vsx-impl.c.inc71
-rw-r--r--target/riscv/cpu.c17
-rw-r--r--target/riscv/cpu.h15
-rw-r--r--target/riscv/cpu_helper.c6
-rw-r--r--target/riscv/csr.c26
-rw-r--r--target/riscv/fpu_helper.c178
-rw-r--r--target/riscv/helper.h4
-rw-r--r--target/riscv/insn_trans/trans_rvb.c.inc8
-rw-r--r--target/riscv/insn_trans/trans_rvd.c.inc285
-rw-r--r--target/riscv/insn_trans/trans_rvf.c.inc314
-rw-r--r--target/riscv/insn_trans/trans_rvzfh.c.inc332
-rw-r--r--target/riscv/internals.h32
-rw-r--r--target/riscv/pmp.h2
-rw-r--r--target/riscv/translate.c149
-rw-r--r--target/rx/cpu-qom.h5
-rw-r--r--target/rx/cpu.h6
-rw-r--r--target/s390x/cpu-qom.h7
-rw-r--r--target/s390x/cpu.h7
-rw-r--r--target/sh4/cpu-qom.h3
-rw-r--r--target/sh4/cpu.h7
-rw-r--r--target/sparc/cpu-qom.h3
-rw-r--r--target/sparc/cpu.h9
-rw-r--r--target/tricore/cpu-qom.h3
-rw-r--r--target/tricore/cpu.h10
-rw-r--r--target/xtensa/cpu-qom.h3
-rw-r--r--target/xtensa/cpu.h13
-rw-r--r--tcg/aarch64/tcg-target.h3
-rw-r--r--tcg/arm/tcg-target.h3
-rw-r--r--tcg/i386/tcg-target-con-set.h1
-rw-r--r--tcg/i386/tcg-target.c.inc385
-rw-r--r--tcg/i386/tcg-target.h17
-rw-r--r--tcg/i386/tcg-target.opc.h3
-rw-r--r--tcg/optimize.c20
-rw-r--r--tcg/ppc/tcg-target.c.inc15
-rw-r--r--tcg/ppc/tcg-target.h3
-rw-r--r--tcg/region.c1
-rw-r--r--tcg/s390x/tcg-target.c.inc17
-rw-r--r--tcg/s390x/tcg-target.h3
-rw-r--r--tcg/tcg-op-vec.c27
-rw-r--r--tcg/tcg.c6
-rw-r--r--tcg/tci/tcg-target.c.inc2
-rw-r--r--tests/avocado/avocado_qemu/__init__.py2
-rw-r--r--tests/avocado/boot_linux.py2
-rw-r--r--tests/bench/atomic_add-bench.c1
-rw-r--r--tests/bench/qht-bench.c1
-rwxr-xr-xtests/check-block.sh12
-rwxr-xr-xtests/qemu-iotests/0401
-rwxr-xr-xtests/qemu-iotests/185190
-rw-r--r--tests/qemu-iotests/185.out48
-rw-r--r--tests/qemu-iotests/257.out224
-rwxr-xr-xtests/qemu-iotests/2712
-rwxr-xr-xtests/qemu-iotests/2968
-rw-r--r--tests/qemu-iotests/296.out17
-rw-r--r--tests/qemu-iotests/common.filter65
-rw-r--r--tests/qemu-iotests/common.rc49
-rw-r--r--tests/qemu-iotests/iotests.py40
-rw-r--r--tests/qemu-iotests/testenv.py5
-rw-r--r--tests/qemu-iotests/testrunner.py19
-rwxr-xr-xtests/qemu-iotests/tests/graph-changes-while-io91
-rw-r--r--tests/qemu-iotests/tests/graph-changes-while-io.out5
-rwxr-xr-xtests/qemu-iotests/tests/image-fleecing185
-rw-r--r--tests/qemu-iotests/tests/image-fleecing.out221
-rw-r--r--tests/qtest/virtio-9p-test.c2
-rwxr-xr-xtests/tcg/configure.sh4
-rw-r--r--tests/tcg/ppc64le/bcdsub.c144
-rw-r--r--tests/tcg/ppc64le/mtfsf.c19
-rw-r--r--tests/tcg/ppc64le/non_signalling_xscv.c16
-rw-r--r--tests/tcg/s390x/exrl-trt.c8
-rw-r--r--tests/tcg/s390x/exrl-trtr.c8
-rw-r--r--tests/tcg/s390x/mie3-mvcrl.c10
-rw-r--r--tests/tcg/s390x/mie3-sel.c35
-rw-r--r--tests/tcg/s390x/mvc.c4
-rw-r--r--tests/tcg/s390x/mvo.c4
-rw-r--r--tests/tcg/s390x/pack.c2
-rw-r--r--tests/unit/ptimer-test-stubs.c1
-rw-r--r--tests/unit/rcutorture.c10
-rw-r--r--tests/unit/test-block-iothread.c8
-rw-r--r--tests/unit/test-rcu-list.c4
-rwxr-xr-xtests/vm/haiku.x86_648
-rw-r--r--ui/clipboard.c6
-rw-r--r--ui/cocoa.m116
-rw-r--r--ui/console-gl.c4
-rw-r--r--ui/console.c29
-rw-r--r--util/async.c12
-rw-r--r--util/atomic64.c1
-rw-r--r--util/hbitmap.c33
-rw-r--r--util/memalign.c92
-rw-r--r--util/meson.build1
-rw-r--r--util/osdep.c21
-rw-r--r--util/oslib-posix.c46
-rw-r--r--util/oslib-win32.c35
-rw-r--r--util/qht.c1
-rw-r--r--util/rcu.c10
367 files changed, 12087 insertions, 5534 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 83a9f79c1c..38d1ac8803 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2514,9 +2514,12 @@ F: block/stream.c
F: block/mirror.c
F: qapi/job.json
F: block/block-copy.c
-F: include/block/block-copy.c
+F: include/block/block-copy.h
+F: block/reqlist.c
+F: include/block/reqlist.h
F: block/copy-before-write.h
F: block/copy-before-write.c
+F: block/snapshot-access.c
F: include/block/aio_task.h
F: block/aio_task.c
F: util/qemu-co-shared-resource.c
@@ -3607,7 +3610,8 @@ FreeBSD Hosted Continuous Integration
M: Ed Maste <emaste@freebsd.org>
M: Li-Wen Hsu <lwhsu@freebsd.org>
S: Maintained
-F: .cirrus.yml
+F: .gitlab-ci.d/cirrus/freebsd*
+F: tests/vm/freebsd
W: https://cirrus-ci.com/github/qemu/qemu
Windows Hosted Continuous Integration
diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c
index 7516c67a3f..c4244a23c6 100644
--- a/accel/kvm/kvm-accel-ops.c
+++ b/accel/kvm/kvm-accel-ops.c
@@ -74,11 +74,23 @@ static void kvm_start_vcpu_thread(CPUState *cpu)
cpu, QEMU_THREAD_JOINABLE);
}
+static bool kvm_vcpu_thread_is_idle(CPUState *cpu)
+{
+ return !kvm_halt_in_kernel();
+}
+
+static bool kvm_cpus_are_resettable(void)
+{
+ return !kvm_enabled() || kvm_cpu_check_are_resettable();
+}
+
static void kvm_accel_ops_class_init(ObjectClass *oc, void *data)
{
AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
ops->create_vcpu_thread = kvm_start_vcpu_thread;
+ ops->cpu_thread_is_idle = kvm_vcpu_thread_is_idle;
+ ops->cpus_are_resettable = kvm_cpus_are_resettable;
ops->synchronize_post_reset = kvm_cpu_synchronize_post_reset;
ops->synchronize_post_init = kvm_cpu_synchronize_post_init;
ops->synchronize_state = kvm_cpu_synchronize_state;
diff --git a/accel/meson.build b/accel/meson.build
index dfd808d2c8..b9a963cf80 100644
--- a/accel/meson.build
+++ b/accel/meson.build
@@ -2,12 +2,14 @@ specific_ss.add(files('accel-common.c'))
softmmu_ss.add(files('accel-softmmu.c'))
user_ss.add(files('accel-user.c'))
-subdir('hvf')
-subdir('qtest')
-subdir('kvm')
subdir('tcg')
-subdir('xen')
-subdir('stubs')
+if have_system
+ subdir('hvf')
+ subdir('qtest')
+ subdir('kvm')
+ subdir('xen')
+ subdir('stubs')
+endif
dummy_ss = ss.source_set()
dummy_ss.add(files(
diff --git a/accel/qtest/qtest.c b/accel/qtest/qtest.c
index 7e6b8110d5..f6056ac836 100644
--- a/accel/qtest/qtest.c
+++ b/accel/qtest/qtest.c
@@ -20,7 +20,6 @@
#include "qemu/accel.h"
#include "sysemu/qtest.h"
#include "sysemu/cpus.h"
-#include "sysemu/cpu-timers.h"
#include "qemu/guest-random.h"
#include "qemu/main-loop.h"
#include "hw/core/cpu.h"
diff --git a/accel/stubs/hax-stub.c b/accel/stubs/hax-stub.c
index 49077f88e3..2fe31aaa9a 100644
--- a/accel/stubs/hax-stub.c
+++ b/accel/stubs/hax-stub.c
@@ -16,6 +16,8 @@
#include "qemu/osdep.h"
#include "sysemu/hax.h"
+bool hax_allowed;
+
int hax_sync_vcpus(void)
{
return 0;
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
index 5319573e00..7e0fb884b9 100644
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -12,10 +12,7 @@
#include "qemu/osdep.h"
#include "sysemu/kvm.h"
-
-#ifndef CONFIG_USER_ONLY
#include "hw/pci/msi.h"
-#endif
KVMState *kvm_state;
bool kvm_kernel_irqchip;
@@ -80,7 +77,6 @@ int kvm_on_sigbus(int code, void *addr)
return 1;
}
-#ifndef CONFIG_USER_ONLY
int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
{
return -ENOSYS;
@@ -152,4 +148,3 @@ bool kvm_dirty_ring_enabled(void)
{
return false;
}
-#endif
diff --git a/accel/stubs/meson.build b/accel/stubs/meson.build
index 12dd1539af..0249b9258f 100644
--- a/accel/stubs/meson.build
+++ b/accel/stubs/meson.build
@@ -1,4 +1,7 @@
-specific_ss.add(when: 'CONFIG_HAX', if_false: files('hax-stub.c'))
-specific_ss.add(when: 'CONFIG_XEN', if_false: files('xen-stub.c'))
-specific_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c'))
-specific_ss.add(when: 'CONFIG_TCG', if_false: files('tcg-stub.c'))
+sysemu_stubs_ss = ss.source_set()
+sysemu_stubs_ss.add(when: 'CONFIG_HAX', if_false: files('hax-stub.c'))
+sysemu_stubs_ss.add(when: 'CONFIG_XEN', if_false: files('xen-stub.c'))
+sysemu_stubs_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c'))
+sysemu_stubs_ss.add(when: 'CONFIG_TCG', if_false: files('tcg-stub.c'))
+
+specific_ss.add_all(when: ['CONFIG_SOFTMMU'], if_true: sysemu_stubs_ss)
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index c68270f794..c997c2e8e0 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -422,7 +422,7 @@ static void cpu_exec_exit(CPUState *cpu)
void cpu_exec_step_atomic(CPUState *cpu)
{
- CPUArchState *env = (CPUArchState *)cpu->env_ptr;
+ CPUArchState *env = cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags, cflags;
@@ -532,7 +532,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
struct tb_desc desc;
uint32_t h;
- desc.env = (CPUArchState *)cpu->env_ptr;
+ desc.env = cpu->env_ptr;
desc.cs_base = cs_base;
desc.flags = flags;
desc.cflags = cflags;
diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c
index ea42d1d51b..bdaf2c943b 100644
--- a/accel/tcg/tcg-accel-ops-icount.c
+++ b/accel/tcg/tcg-accel-ops-icount.c
@@ -27,6 +27,7 @@
#include "qemu-common.h"
#include "sysemu/tcg.h"
#include "sysemu/replay.h"
+#include "sysemu/cpu-timers.h"
#include "qemu/main-loop.h"
#include "qemu/guest-random.h"
#include "exec/exec-all.h"
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index 29632bd4c0..dc421c8fd7 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -27,6 +27,7 @@
#include "qemu-common.h"
#include "sysemu/tcg.h"
#include "sysemu/replay.h"
+#include "sysemu/cpu-timers.h"
#include "qemu/main-loop.h"
#include "qemu/notify.h"
#include "qemu/guest-random.h"
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index bf59f53dbc..a805fb6bdd 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -27,6 +27,7 @@
#include "qemu-common.h"
#include "sysemu/tcg.h"
#include "sysemu/replay.h"
+#include "sysemu/cpu-timers.h"
#include "qemu/main-loop.h"
#include "qemu/notify.h"
#include "qemu/guest-random.h"
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 1a8e8390bd..ea7dcad674 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -29,6 +29,7 @@
#include "qemu-common.h"
#include "sysemu/tcg.h"
#include "sysemu/replay.h"
+#include "sysemu/cpu-timers.h"
#include "qemu/main-loop.h"
#include "qemu/guest-random.h"
#include "exec/exec-all.h"
diff --git a/audio/alsaaudio.c b/audio/alsaaudio.c
index 2b9789e647..b04716a6cc 100644
--- a/audio/alsaaudio.c
+++ b/audio/alsaaudio.c
@@ -916,6 +916,7 @@ static struct audio_pcm_ops alsa_pcm_ops = {
.init_out = alsa_init_out,
.fini_out = alsa_fini_out,
.write = alsa_write,
+ .buffer_get_free = audio_generic_buffer_get_free,
.run_buffer_out = audio_generic_run_buffer_out,
.enable_out = alsa_enable_out,
diff --git a/audio/audio.c b/audio/audio.c
index dc28685d22..a88572e713 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -548,65 +548,45 @@ static size_t audio_pcm_hw_get_live_in(HWVoiceIn *hw)
return live;
}
-static void audio_pcm_hw_clip_out(HWVoiceOut *hw, void *pcm_buf, size_t len)
+static size_t audio_pcm_hw_conv_in(HWVoiceIn *hw, void *pcm_buf, size_t samples)
{
- size_t clipped = 0;
- size_t pos = hw->mix_buf->pos;
-
- while (len) {
- st_sample *src = hw->mix_buf->samples + pos;
- uint8_t *dst = advance(pcm_buf, clipped * hw->info.bytes_per_frame);
- size_t samples_till_end_of_buf = hw->mix_buf->size - pos;
- size_t samples_to_clip = MIN(len, samples_till_end_of_buf);
+ size_t conv = 0;
+ STSampleBuffer *conv_buf = hw->conv_buf;
- hw->clip(dst, src, samples_to_clip);
+ while (samples) {
+ uint8_t *src = advance(pcm_buf, conv * hw->info.bytes_per_frame);
+ size_t proc = MIN(samples, conv_buf->size - conv_buf->pos);
- pos = (pos + samples_to_clip) % hw->mix_buf->size;
- len -= samples_to_clip;
- clipped += samples_to_clip;
+ hw->conv(conv_buf->samples + conv_buf->pos, src, proc);
+ conv_buf->pos = (conv_buf->pos + proc) % conv_buf->size;
+ samples -= proc;
+ conv += proc;
}
+
+ return conv;
}
/*
* Soft voice (capture)
*/
-static size_t audio_pcm_sw_get_rpos_in(SWVoiceIn *sw)
-{
- HWVoiceIn *hw = sw->hw;
- ssize_t live = hw->total_samples_captured - sw->total_hw_samples_acquired;
- ssize_t rpos;
-
- if (audio_bug(__func__, live < 0 || live > hw->conv_buf->size)) {
- dolog("live=%zu hw->conv_buf->size=%zu\n", live, hw->conv_buf->size);
- return 0;
- }
-
- rpos = hw->conv_buf->pos - live;
- if (rpos >= 0) {
- return rpos;
- } else {
- return hw->conv_buf->size + rpos;
- }
-}
-
static size_t audio_pcm_sw_read(SWVoiceIn *sw, void *buf, size_t size)
{
HWVoiceIn *hw = sw->hw;
size_t samples, live, ret = 0, swlim, isamp, osamp, rpos, total = 0;
struct st_sample *src, *dst = sw->buf;
- rpos = audio_pcm_sw_get_rpos_in(sw) % hw->conv_buf->size;
-
live = hw->total_samples_captured - sw->total_hw_samples_acquired;
+ if (!live) {
+ return 0;
+ }
if (audio_bug(__func__, live > hw->conv_buf->size)) {
dolog("live_in=%zu hw->conv_buf->size=%zu\n", live, hw->conv_buf->size);
return 0;
}
+ rpos = audio_ring_posb(hw->conv_buf->pos, live, hw->conv_buf->size);
+
samples = size / sw->info.bytes_per_frame;
- if (!live) {
- return 0;
- }
swlim = (live * sw->ratio) >> 32;
swlim = MIN (swlim, samples);
@@ -632,7 +612,7 @@ static size_t audio_pcm_sw_read(SWVoiceIn *sw, void *buf, size_t size)
total += isamp;
}
- if (hw->pcm_ops && !hw->pcm_ops->volume_in) {
+ if (!hw->pcm_ops->volume_in) {
mixeng_volume (sw->buf, ret, &sw->vol);
}
@@ -683,12 +663,38 @@ static size_t audio_pcm_hw_get_live_out (HWVoiceOut *hw, int *nb_live)
return 0;
}
+static size_t audio_pcm_hw_get_free(HWVoiceOut *hw)
+{
+ return (hw->pcm_ops->buffer_get_free ? hw->pcm_ops->buffer_get_free(hw) :
+ INT_MAX) / hw->info.bytes_per_frame;
+}
+
+static void audio_pcm_hw_clip_out(HWVoiceOut *hw, void *pcm_buf, size_t len)
+{
+ size_t clipped = 0;
+ size_t pos = hw->mix_buf->pos;
+
+ while (len) {
+ st_sample *src = hw->mix_buf->samples + pos;
+ uint8_t *dst = advance(pcm_buf, clipped * hw->info.bytes_per_frame);
+ size_t samples_till_end_of_buf = hw->mix_buf->size - pos;
+ size_t samples_to_clip = MIN(len, samples_till_end_of_buf);
+
+ hw->clip(dst, src, samples_to_clip);
+
+ pos = (pos + samples_to_clip) % hw->mix_buf->size;
+ len -= samples_to_clip;
+ clipped += samples_to_clip;
+ }
+}
+
/*
* Soft voice (playback)
*/
static size_t audio_pcm_sw_write(SWVoiceOut *sw, void *buf, size_t size)
{
- size_t hwsamples, samples, isamp, osamp, wpos, live, dead, left, swlim, blck;
+ size_t hwsamples, samples, isamp, osamp, wpos, live, dead, left, blck;
+ size_t hw_free;
size_t ret = 0, pos = 0, total = 0;
if (!sw) {
@@ -711,27 +717,28 @@ static size_t audio_pcm_sw_write(SWVoiceOut *sw, void *buf, size_t size)
}
wpos = (sw->hw->mix_buf->pos + live) % hwsamples;
- samples = size / sw->info.bytes_per_frame;
dead = hwsamples - live;
- swlim = ((int64_t) dead << 32) / sw->ratio;
- swlim = MIN (swlim, samples);
- if (swlim) {
- sw->conv (sw->buf, buf, swlim);
+ hw_free = audio_pcm_hw_get_free(sw->hw);
+ hw_free = hw_free > live ? hw_free - live : 0;
+ samples = ((int64_t)MIN(dead, hw_free) << 32) / sw->ratio;
+ samples = MIN(samples, size / sw->info.bytes_per_frame);
+ if (samples) {
+ sw->conv(sw->buf, buf, samples);
- if (sw->hw->pcm_ops && !sw->hw->pcm_ops->volume_out) {
- mixeng_volume (sw->buf, swlim, &sw->vol);
+ if (!sw->hw->pcm_ops->volume_out) {
+ mixeng_volume(sw->buf, samples, &sw->vol);
}
}
- while (swlim) {
+ while (samples) {
dead = hwsamples - live;
left = hwsamples - wpos;
blck = MIN (dead, left);
if (!blck) {
break;
}
- isamp = swlim;
+ isamp = samples;
osamp = blck;
st_rate_flow_mix (
sw->rate,
@@ -741,7 +748,7 @@ static size_t audio_pcm_sw_write(SWVoiceOut *sw, void *buf, size_t size)
&osamp
);
ret += isamp;
- swlim -= isamp;
+ samples -= isamp;
pos += isamp;
live += osamp;
wpos = (wpos + osamp) % hwsamples;
@@ -1003,6 +1010,11 @@ static size_t audio_get_avail (SWVoiceIn *sw)
return (((int64_t) live << 32) / sw->ratio) * sw->info.bytes_per_frame;
}
+static size_t audio_sw_bytes_free(SWVoiceOut *sw, size_t free)
+{
+ return (((int64_t)free << 32) / sw->ratio) * sw->info.bytes_per_frame;
+}
+
static size_t audio_get_free(SWVoiceOut *sw)
{
size_t live, dead;
@@ -1022,13 +1034,11 @@ static size_t audio_get_free(SWVoiceOut *sw)
dead = sw->hw->mix_buf->size - live;
#ifdef DEBUG_OUT
- dolog ("%s: get_free live %zu dead %zu ret %" PRId64 "\n",
- SW_NAME (sw),
- live, dead, (((int64_t) dead << 32) / sw->ratio) *
- sw->info.bytes_per_frame);
+ dolog("%s: get_free live %zu dead %zu sw_bytes %zu\n",
+ SW_NAME(sw), live, dead, audio_sw_bytes_free(sw, dead));
#endif
- return (((int64_t) dead << 32) / sw->ratio) * sw->info.bytes_per_frame;
+ return dead;
}
static void audio_capture_mix_and_clear(HWVoiceOut *hw, size_t rpos,
@@ -1132,9 +1142,27 @@ static void audio_run_out (AudioState *s)
}
while ((hw = audio_pcm_hw_find_any_enabled_out(s, hw))) {
- size_t played, live, prev_rpos, free;
+ size_t played, live, prev_rpos;
+ size_t hw_free = audio_pcm_hw_get_free(hw);
int nb_live;
+ for (sw = hw->sw_head.lh_first; sw; sw = sw->entries.le_next) {
+ if (sw->active) {
+ size_t sw_free = audio_get_free(sw);
+ size_t free;
+
+ if (hw_free > sw->total_hw_samples_mixed) {
+ free = audio_sw_bytes_free(sw,
+ MIN(sw_free, hw_free - sw->total_hw_samples_mixed));
+ } else {
+ free = 0;
+ }
+ if (free > 0) {
+ sw->callback.fn(sw->callback.opaque, free);
+ }
+ }
+ }
+
live = audio_pcm_hw_get_live_out (hw, &nb_live);
if (!nb_live) {
live = 0;
@@ -1163,14 +1191,6 @@ static void audio_run_out (AudioState *s)
}
if (!live) {
- for (sw = hw->sw_head.lh_first; sw; sw = sw->entries.le_next) {
- if (sw->active) {
- free = audio_get_free (sw);
- if (free > 0) {
- sw->callback.fn (sw->callback.opaque, free);
- }
- }
- }
if (hw->pcm_ops->run_buffer_out) {
hw->pcm_ops->run_buffer_out(hw);
}
@@ -1211,13 +1231,6 @@ static void audio_run_out (AudioState *s)
if (!sw->total_hw_samples_mixed) {
sw->empty = 1;
}
-
- if (sw->active) {
- free = audio_get_free (sw);
- if (free > 0) {
- sw->callback.fn (sw->callback.opaque, free);
- }
- }
}
}
}
@@ -1225,7 +1238,6 @@ static void audio_run_out (AudioState *s)
static size_t audio_pcm_hw_run_in(HWVoiceIn *hw, size_t samples)
{
size_t conv = 0;
- STSampleBuffer *conv_buf = hw->conv_buf;
if (hw->pcm_ops->run_buffer_in) {
hw->pcm_ops->run_buffer_in(hw);
@@ -1241,11 +1253,7 @@ static size_t audio_pcm_hw_run_in(HWVoiceIn *hw, size_t samples)
break;
}
- proc = MIN(size / hw->info.bytes_per_frame,
- conv_buf->size - conv_buf->pos);
-
- hw->conv(conv_buf->samples + conv_buf->pos, buf, proc);
- conv_buf->pos = (conv_buf->pos + proc) % conv_buf->size;
+ proc = audio_pcm_hw_conv_in(hw, buf, size / hw->info.bytes_per_frame);
samples -= proc;
conv += proc;
@@ -1394,12 +1402,10 @@ void audio_generic_run_buffer_in(HWVoiceIn *hw)
void *audio_generic_get_buffer_in(HWVoiceIn *hw, size_t *size)
{
- ssize_t start = (ssize_t)hw->pos_emul - hw->pending_emul;
+ size_t start;
- if (start < 0) {
- start += hw->size_emul;
- }
- assert(start >= 0 && start < hw->size_emul);
+ start = audio_ring_posb(hw->pos_emul, hw->pending_emul, hw->size_emul);
+ assert(start < hw->size_emul);
*size = MIN(*size, hw->pending_emul);
*size = MIN(*size, hw->size_emul - start);
@@ -1412,16 +1418,22 @@ void audio_generic_put_buffer_in(HWVoiceIn *hw, void *buf, size_t size)
hw->pending_emul -= size;
}
+size_t audio_generic_buffer_get_free(HWVoiceOut *hw)
+{
+ if (hw->buf_emul) {
+ return hw->size_emul - hw->pending_emul;
+ } else {
+ return hw->samples * hw->info.bytes_per_frame;
+ }
+}
+
void audio_generic_run_buffer_out(HWVoiceOut *hw)
{
while (hw->pending_emul) {
- size_t write_len, written;
- ssize_t start = ((ssize_t) hw->pos_emul) - hw->pending_emul;
+ size_t write_len, written, start;
- if (start < 0) {
- start += hw->size_emul;
- }
- assert(start >= 0 && start < hw->size_emul);
+ start = audio_ring_posb(hw->pos_emul, hw->pending_emul, hw->size_emul);
+ assert(start < hw->size_emul);
write_len = MIN(hw->pending_emul, hw->size_emul - start);
@@ -1462,6 +1474,12 @@ size_t audio_generic_write(HWVoiceOut *hw, void *buf, size_t size)
{
size_t total = 0;
+ if (hw->pcm_ops->buffer_get_free) {
+ size_t free = hw->pcm_ops->buffer_get_free(hw);
+
+ size = MIN(size, free);
+ }
+
while (total < size) {
size_t dst_size = size - total;
size_t copy_size, proc;
@@ -1821,6 +1839,7 @@ void AUD_remove_card (QEMUSoundCard *card)
g_free (card->name);
}
+static struct audio_pcm_ops capture_pcm_ops;
CaptureVoiceOut *AUD_add_capture(
AudioState *s,
@@ -1866,6 +1885,7 @@ CaptureVoiceOut *AUD_add_capture(
hw = &cap->hw;
hw->s = s;
+ hw->pcm_ops = &capture_pcm_ops;
QLIST_INIT (&hw->sw_head);
QLIST_INIT (&cap->cb_head);
diff --git a/audio/audio_int.h b/audio/audio_int.h
index 428a091d05..2a6914d2aa 100644
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@@ -162,9 +162,13 @@ struct audio_pcm_ops {
size_t (*write) (HWVoiceOut *hw, void *buf, size_t size);
void (*run_buffer_out)(HWVoiceOut *hw);
/*
+ * Get the free output buffer size. This is an upper limit. The size
+ * returned by function get_buffer_out may be smaller.
+ */
+ size_t (*buffer_get_free)(HWVoiceOut *hw);
+ /*
* get a buffer that after later can be passed to put_buffer_out; optional
* returns the buffer, and writes it's size to size (in bytes)
- * this is unrelated to the above buffer_size_out function
*/
void *(*get_buffer_out)(HWVoiceOut *hw, size_t *size);
/*
@@ -190,6 +194,7 @@ void audio_generic_run_buffer_in(HWVoiceIn *hw);
void *audio_generic_get_buffer_in(HWVoiceIn *hw, size_t *size);
void audio_generic_put_buffer_in(HWVoiceIn *hw, void *buf, size_t size);
void audio_generic_run_buffer_out(HWVoiceOut *hw);
+size_t audio_generic_buffer_get_free(HWVoiceOut *hw);
void *audio_generic_get_buffer_out(HWVoiceOut *hw, size_t *size);
size_t audio_generic_put_buffer_out(HWVoiceOut *hw, void *buf, size_t size);
size_t audio_generic_write(HWVoiceOut *hw, void *buf, size_t size);
@@ -266,6 +271,19 @@ static inline size_t audio_ring_dist(size_t dst, size_t src, size_t len)
return (dst >= src) ? (dst - src) : (len - src + dst);
}
+/**
+ * audio_ring_posb() - returns new position in ringbuffer in backward
+ * direction at given distance
+ *
+ * @pos: current position in ringbuffer
+ * @dist: distance in ringbuffer to walk in reverse direction
+ * @len: size of ringbuffer
+ */
+static inline size_t audio_ring_posb(size_t pos, size_t dist, size_t len)
+{
+ return pos >= dist ? pos - dist : len - dist + pos;
+}
+
#define dolog(fmt, ...) AUD_log(AUDIO_CAP, fmt, ## __VA_ARGS__)
#ifdef DEBUG
diff --git a/audio/coreaudio.c b/audio/coreaudio.c
index d8a21d3e50..0f19d0ce01 100644
--- a/audio/coreaudio.c
+++ b/audio/coreaudio.c
@@ -283,6 +283,7 @@ static int coreaudio_buf_unlock (coreaudioVoiceOut *core, const char *fn_name)
coreaudio_buf_unlock(core, "coreaudio_" #name); \
return ret; \
}
+COREAUDIO_WRAPPER_FUNC(buffer_get_free, size_t, (HWVoiceOut *hw), (hw))
COREAUDIO_WRAPPER_FUNC(get_buffer_out, void *, (HWVoiceOut *hw, size_t *size),
(hw, size))
COREAUDIO_WRAPPER_FUNC(put_buffer_out, size_t,
@@ -333,12 +334,10 @@ static OSStatus audioDeviceIOProc(
len = frameCount * hw->info.bytes_per_frame;
while (len) {
- size_t write_len;
- ssize_t start = ((ssize_t) hw->pos_emul) - hw->pending_emul;
- if (start < 0) {
- start += hw->size_emul;
- }
- assert(start >= 0 && start < hw->size_emul);
+ size_t write_len, start;
+
+ start = audio_ring_posb(hw->pos_emul, hw->pending_emul, hw->size_emul);
+ assert(start < hw->size_emul);
write_len = MIN(MIN(hw->pending_emul, len),
hw->size_emul - start);
@@ -604,6 +603,8 @@ static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as,
coreaudio_playback_logerr(status,
"Could not remove voice property change listener\n");
}
+
+ return -1;
}
return 0;
@@ -654,6 +655,8 @@ static struct audio_pcm_ops coreaudio_pcm_ops = {
.fini_out = coreaudio_fini_out,
/* wrapper for audio_generic_write */
.write = coreaudio_write,
+ /* wrapper for audio_generic_buffer_get_free */
+ .buffer_get_free = coreaudio_buffer_get_free,
/* wrapper for audio_generic_get_buffer_out */
.get_buffer_out = coreaudio_get_buffer_out,
/* wrapper for audio_generic_put_buffer_out */
diff --git a/audio/dsoundaudio.c b/audio/dsoundaudio.c
index 3dd2c4d4a6..231f3e65b3 100644
--- a/audio/dsoundaudio.c
+++ b/audio/dsoundaudio.c
@@ -427,22 +427,18 @@ static void dsound_enable_out(HWVoiceOut *hw, bool enable)
}
}
-static void *dsound_get_buffer_out(HWVoiceOut *hw, size_t *size)
+static size_t dsound_buffer_get_free(HWVoiceOut *hw)
{
DSoundVoiceOut *ds = (DSoundVoiceOut *) hw;
LPDIRECTSOUNDBUFFER dsb = ds->dsound_buffer;
HRESULT hr;
- DWORD ppos, wpos, act_size;
- size_t req_size;
- int err;
- void *ret;
+ DWORD ppos, wpos;
hr = IDirectSoundBuffer_GetCurrentPosition(
dsb, &ppos, ds->first_time ? &wpos : NULL);
if (FAILED(hr)) {
dsound_logerr(hr, "Could not get playback buffer position\n");
- *size = 0;
- return NULL;
+ return 0;
}
if (ds->first_time) {
@@ -450,13 +446,20 @@ static void *dsound_get_buffer_out(HWVoiceOut *hw, size_t *size)
ds->first_time = false;
}
- req_size = audio_ring_dist(ppos, hw->pos_emul, hw->size_emul);
- req_size = MIN(req_size, hw->size_emul - hw->pos_emul);
+ return audio_ring_dist(ppos, hw->pos_emul, hw->size_emul);
+}
- if (req_size == 0) {
- *size = 0;
- return NULL;
- }
+static void *dsound_get_buffer_out(HWVoiceOut *hw, size_t *size)
+{
+ DSoundVoiceOut *ds = (DSoundVoiceOut *)hw;
+ LPDIRECTSOUNDBUFFER dsb = ds->dsound_buffer;
+ DWORD act_size;
+ size_t req_size;
+ int err;
+ void *ret;
+
+ req_size = MIN(*size, hw->size_emul - hw->pos_emul);
+ assert(req_size > 0);
err = dsound_lock_out(dsb, &hw->info, hw->pos_emul, req_size, &ret, NULL,
&act_size, NULL, false, ds->s);
@@ -699,6 +702,7 @@ static struct audio_pcm_ops dsound_pcm_ops = {
.init_out = dsound_init_out,
.fini_out = dsound_fini_out,
.write = audio_generic_write,
+ .buffer_get_free = dsound_buffer_get_free,
.get_buffer_out = dsound_get_buffer_out,
.put_buffer_out = dsound_put_buffer_out,
.enable_out = dsound_enable_out,
diff --git a/audio/jackaudio.c b/audio/jackaudio.c
index 317009e936..bf757250b5 100644
--- a/audio/jackaudio.c
+++ b/audio/jackaudio.c
@@ -483,8 +483,8 @@ static int qjack_client_init(QJackClient *c)
c->buffersize = 512;
}
- /* create a 2 period buffer */
- qjack_buffer_create(&c->fifo, c->nchannels, c->buffersize * 2);
+ /* create a 3 period buffer */
+ qjack_buffer_create(&c->fifo, c->nchannels, c->buffersize * 3);
qjack_client_connect_ports(c);
c->state = QJACK_STATE_RUNNING;
@@ -652,6 +652,7 @@ static struct audio_pcm_ops jack_pcm_ops = {
.init_out = qjack_init_out,
.fini_out = qjack_fini_out,
.write = qjack_write,
+ .buffer_get_free = audio_generic_buffer_get_free,
.run_buffer_out = audio_generic_run_buffer_out,
.enable_out = qjack_enable_out,
diff --git a/audio/noaudio.c b/audio/noaudio.c
index aac87dbc93..84a6bfbb1c 100644
--- a/audio/noaudio.c
+++ b/audio/noaudio.c
@@ -118,6 +118,7 @@ static struct audio_pcm_ops no_pcm_ops = {
.init_out = no_init_out,
.fini_out = no_fini_out,
.write = no_write,
+ .buffer_get_free = audio_generic_buffer_get_free,
.run_buffer_out = audio_generic_run_buffer_out,
.enable_out = no_enable_out,
diff --git a/audio/ossaudio.c b/audio/ossaudio.c
index 60eff66424..da9c232222 100644
--- a/audio/ossaudio.c
+++ b/audio/ossaudio.c
@@ -389,11 +389,23 @@ static void oss_run_buffer_out(HWVoiceOut *hw)
}
}
+static size_t oss_buffer_get_free(HWVoiceOut *hw)
+{
+ OSSVoiceOut *oss = (OSSVoiceOut *)hw;
+
+ if (oss->mmapped) {
+ return oss_get_available_bytes(oss);
+ } else {
+ return audio_generic_buffer_get_free(hw);
+ }
+}
+
static void *oss_get_buffer_out(HWVoiceOut *hw, size_t *size)
{
- OSSVoiceOut *oss = (OSSVoiceOut *) hw;
+ OSSVoiceOut *oss = (OSSVoiceOut *)hw;
+
if (oss->mmapped) {
- *size = MIN(oss_get_available_bytes(oss), hw->size_emul - hw->pos_emul);
+ *size = hw->size_emul - hw->pos_emul;
return hw->buf_emul + hw->pos_emul;
} else {
return audio_generic_get_buffer_out(hw, size);
@@ -750,6 +762,7 @@ static struct audio_pcm_ops oss_pcm_ops = {
.init_out = oss_init_out,
.fini_out = oss_fini_out,
.write = oss_write,
+ .buffer_get_free = oss_buffer_get_free,
.run_buffer_out = oss_run_buffer_out,
.get_buffer_out = oss_get_buffer_out,
.put_buffer_out = oss_put_buffer_out,
diff --git a/audio/paaudio.c b/audio/paaudio.c
index 75401d5391..a53ed85e0b 100644
--- a/audio/paaudio.c
+++ b/audio/paaudio.c
@@ -201,13 +201,11 @@ unlock_and_fail:
return 0;
}
-static void *qpa_get_buffer_out(HWVoiceOut *hw, size_t *size)
+static size_t qpa_buffer_get_free(HWVoiceOut *hw)
{
- PAVoiceOut *p = (PAVoiceOut *) hw;
+ PAVoiceOut *p = (PAVoiceOut *)hw;
PAConnection *c = p->g->conn;
- void *ret;
size_t l;
- int r;
pa_threaded_mainloop_lock(c->mainloop);
@@ -216,7 +214,6 @@ static void *qpa_get_buffer_out(HWVoiceOut *hw, size_t *size)
if (pa_stream_get_state(p->stream) != PA_STREAM_READY) {
/* wait for stream to become ready */
l = 0;
- ret = NULL;
goto unlock;
}
@@ -224,16 +221,33 @@ static void *qpa_get_buffer_out(HWVoiceOut *hw, size_t *size)
CHECK_SUCCESS_GOTO(c, l != (size_t) -1, unlock_and_fail,
"pa_stream_writable_size failed\n");
+unlock:
+ pa_threaded_mainloop_unlock(c->mainloop);
+ return l;
+
+unlock_and_fail:
+ pa_threaded_mainloop_unlock(c->mainloop);
+ return 0;
+}
+
+static void *qpa_get_buffer_out(HWVoiceOut *hw, size_t *size)
+{
+ PAVoiceOut *p = (PAVoiceOut *)hw;
+ PAConnection *c = p->g->conn;
+ void *ret;
+ int r;
+
+ pa_threaded_mainloop_lock(c->mainloop);
+
+ CHECK_DEAD_GOTO(c, p->stream, unlock_and_fail,
+ "pa_threaded_mainloop_lock failed\n");
+
*size = -1;
r = pa_stream_begin_write(p->stream, &ret, size);
CHECK_SUCCESS_GOTO(c, r >= 0, unlock_and_fail,
"pa_stream_begin_write failed\n");
-unlock:
pa_threaded_mainloop_unlock(c->mainloop);
- if (*size > l) {
- *size = l;
- }
return ret;
unlock_and_fail:
@@ -535,11 +549,8 @@ static int qpa_init_out(HWVoiceOut *hw, struct audsettings *as,
}
audio_pcm_init_info (&hw->info, &obt_as);
- /*
- * This is wrong. hw->samples counts in frames. hw->samples will be
- * number of channels times larger than expected.
- */
- hw->samples = audio_buffer_samples(
+ /* hw->samples counts in frames */
+ hw->samples = audio_buffer_frames(
qapi_AudiodevPaPerDirectionOptions_base(ppdo), &obt_as, 46440);
return 0;
@@ -587,11 +598,8 @@ static int qpa_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
}
audio_pcm_init_info (&hw->info, &obt_as);
- /*
- * This is wrong. hw->samples counts in frames. hw->samples will be
- * number of channels times larger than expected.
- */
- hw->samples = audio_buffer_samples(
+ /* hw->samples counts in frames */
+ hw->samples = audio_buffer_frames(
qapi_AudiodevPaPerDirectionOptions_base(ppdo), &obt_as, 46440);
return 0;
@@ -744,7 +752,7 @@ static int qpa_validate_per_direction_opts(Audiodev *dev,
{
if (!pdo->has_latency) {
pdo->has_latency = true;
- pdo->latency = 15000;
+ pdo->latency = 46440;
}
return 1;
}
@@ -901,6 +909,7 @@ static struct audio_pcm_ops qpa_pcm_ops = {
.init_out = qpa_init_out,
.fini_out = qpa_fini_out,
.write = qpa_write,
+ .buffer_get_free = qpa_buffer_get_free,
.get_buffer_out = qpa_get_buffer_out,
.put_buffer_out = qpa_put_buffer_out,
.volume_out = qpa_volume_out,
diff --git a/audio/sdlaudio.c b/audio/sdlaudio.c
index c68c62a3e4..797b47bbdd 100644
--- a/audio/sdlaudio.c
+++ b/audio/sdlaudio.c
@@ -224,12 +224,11 @@ static void sdl_callback_out(void *opaque, Uint8 *buf, int len)
/* dolog("callback_out: len=%d avail=%zu\n", len, hw->pending_emul); */
while (hw->pending_emul && len) {
- size_t write_len;
- ssize_t start = (ssize_t)hw->pos_emul - hw->pending_emul;
- if (start < 0) {
- start += hw->size_emul;
- }
- assert(start >= 0 && start < hw->size_emul);
+ size_t write_len, start;
+
+ start = audio_ring_posb(hw->pos_emul, hw->pending_emul,
+ hw->size_emul);
+ assert(start < hw->size_emul);
write_len = MIN(MIN(hw->pending_emul, len),
hw->size_emul - start);
@@ -310,6 +309,7 @@ static void sdl_callback_in(void *opaque, Uint8 *buf, int len)
SDL_UnlockAudioDevice(sdl->devid); \
}
+SDL_WRAPPER_FUNC(buffer_get_free, size_t, (HWVoiceOut *hw), (hw), Out)
SDL_WRAPPER_FUNC(get_buffer_out, void *, (HWVoiceOut *hw, size_t *size),
(hw, size), Out)
SDL_WRAPPER_FUNC(put_buffer_out, size_t,
@@ -347,11 +347,8 @@ static int sdl_init_out(HWVoiceOut *hw, struct audsettings *as,
req.freq = as->freq;
req.format = aud_to_sdlfmt (as->fmt);
req.channels = as->nchannels;
- /*
- * This is wrong. SDL samples are QEMU frames. The buffer size will be
- * the requested buffer size multiplied by the number of channels.
- */
- req.samples = audio_buffer_samples(
+ /* SDL samples are QEMU frames */
+ req.samples = audio_buffer_frames(
qapi_AudiodevSdlPerDirectionOptions_base(spdo), as, 11610);
req.callback = sdl_callback_out;
req.userdata = sdl;
@@ -472,6 +469,8 @@ static struct audio_pcm_ops sdl_pcm_ops = {
.fini_out = sdl_fini_out,
/* wrapper for audio_generic_write */
.write = sdl_write,
+ /* wrapper for audio_generic_buffer_get_free */
+ .buffer_get_free = sdl_buffer_get_free,
/* wrapper for audio_generic_get_buffer_out */
.get_buffer_out = sdl_get_buffer_out,
/* wrapper for audio_generic_put_buffer_out */
diff --git a/audio/wavaudio.c b/audio/wavaudio.c
index 20e6853f85..ac666335c7 100644
--- a/audio/wavaudio.c
+++ b/audio/wavaudio.c
@@ -197,6 +197,7 @@ static struct audio_pcm_ops wav_pcm_ops = {
.init_out = wav_init_out,
.fini_out = wav_fini_out,
.write = wav_write_out,
+ .buffer_get_free = audio_generic_buffer_get_free,
.run_buffer_out = audio_generic_run_buffer_out,
.enable_out = wav_enable_out,
};
diff --git a/block.c b/block.c
index b54d59d1fa..718e4cae8b 100644
--- a/block.c
+++ b/block.c
@@ -67,12 +67,15 @@
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+/* Protected by BQL */
static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
+/* Protected by BQL */
static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
+/* Protected by BQL */
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
QLIST_HEAD_INITIALIZER(bdrv_drivers);
@@ -134,6 +137,7 @@ size_t bdrv_opt_mem_align(BlockDriverState *bs)
/* page size or 4k (hdd sector size) should be on the safe side */
return MAX(4096, qemu_real_host_page_size);
}
+ IO_CODE();
return bs->bl.opt_mem_alignment;
}
@@ -144,6 +148,7 @@ size_t bdrv_min_mem_align(BlockDriverState *bs)
/* page size or 4k (hdd sector size) should be on the safe side */
return MAX(4096, qemu_real_host_page_size);
}
+ IO_CODE();
return bs->bl.min_mem_alignment;
}
@@ -269,12 +274,15 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
* image is inactivated. */
bool bdrv_is_read_only(BlockDriverState *bs)
{
+ IO_CODE();
return !(bs->open_flags & BDRV_O_RDWR);
}
int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
bool ignore_allow_rdw, Error **errp)
{
+ IO_CODE();
+
/* Do not set read_only if copy_on_read is enabled */
if (bs->copy_on_read && read_only) {
error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
@@ -308,6 +316,7 @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
Error **errp)
{
int ret = 0;
+ IO_CODE();
if (!(bs->open_flags & BDRV_O_RDWR)) {
return 0;
@@ -384,12 +393,14 @@ static char *bdrv_make_absolute_filename(BlockDriverState *relative_to,
char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp)
{
+ GLOBAL_STATE_CODE();
return bdrv_make_absolute_filename(bs, bs->backing_file, errp);
}
void bdrv_register(BlockDriver *bdrv)
{
assert(bdrv->format_name);
+ GLOBAL_STATE_CODE();
QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
}
@@ -398,6 +409,8 @@ BlockDriverState *bdrv_new(void)
BlockDriverState *bs;
int i;
+ GLOBAL_STATE_CODE();
+
bs = g_new0(BlockDriverState, 1);
QLIST_INIT(&bs->dirty_bitmaps);
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
@@ -425,6 +438,7 @@ BlockDriverState *bdrv_new(void)
static BlockDriver *bdrv_do_find_format(const char *format_name)
{
BlockDriver *drv1;
+ GLOBAL_STATE_CODE();
QLIST_FOREACH(drv1, &bdrv_drivers, list) {
if (!strcmp(drv1->format_name, format_name)) {
@@ -440,6 +454,8 @@ BlockDriver *bdrv_find_format(const char *format_name)
BlockDriver *drv1;
int i;
+ GLOBAL_STATE_CODE();
+
drv1 = bdrv_do_find_format(format_name);
if (drv1) {
return drv1;
@@ -489,6 +505,7 @@ static int bdrv_format_is_whitelisted(const char *format_name, bool read_only)
int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
{
+ GLOBAL_STATE_CODE();
return bdrv_format_is_whitelisted(drv->format_name, read_only);
}
@@ -512,6 +529,7 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque)
CreateCo *cco = opaque;
assert(cco->drv);
+ GLOBAL_STATE_CODE();
ret = cco->drv->bdrv_co_create_opts(cco->drv,
cco->filename, cco->opts, &local_err);
@@ -524,6 +542,8 @@ int bdrv_create(BlockDriver *drv, const char* filename,
{
int ret;
+ GLOBAL_STATE_CODE();
+
Coroutine *co;
CreateCo cco = {
.drv = drv,
@@ -578,6 +598,8 @@ static int64_t create_file_fallback_truncate(BlockBackend *blk,
int64_t size;
int ret;
+ GLOBAL_STATE_CODE();
+
ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
&local_err);
if (ret < 0 && ret != -ENOTSUP) {
@@ -616,6 +638,8 @@ static int create_file_fallback_zero_first_sector(BlockBackend *blk,
int64_t bytes_to_clear;
int ret;
+ GLOBAL_STATE_CODE();
+
bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE);
if (bytes_to_clear) {
ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP);
@@ -647,6 +671,8 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
Error *local_err = NULL;
int ret;
+ GLOBAL_STATE_CODE();
+
size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
@@ -699,6 +725,8 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
QDict *qdict;
int ret;
+ GLOBAL_STATE_CODE();
+
drv = bdrv_find_protocol(filename, true, errp);
if (drv == NULL) {
return -ENOENT;
@@ -743,6 +771,7 @@ int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
Error *local_err = NULL;
int ret;
+ IO_CODE();
assert(bs != NULL);
if (!bs->drv) {
@@ -768,6 +797,7 @@ void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs)
{
Error *local_err = NULL;
int ret;
+ IO_CODE();
if (!bs) {
return;
@@ -796,6 +826,7 @@ int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
{
BlockDriver *drv = bs->drv;
BlockDriverState *filtered = bdrv_filter_bs(bs);
+ GLOBAL_STATE_CODE();
if (drv && drv->bdrv_probe_blocksizes) {
return drv->bdrv_probe_blocksizes(bs, bsz);
@@ -816,6 +847,7 @@ int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
{
BlockDriver *drv = bs->drv;
BlockDriverState *filtered = bdrv_filter_bs(bs);
+ GLOBAL_STATE_CODE();
if (drv && drv->bdrv_probe_geometry) {
return drv->bdrv_probe_geometry(bs, geo);
@@ -870,6 +902,7 @@ static BlockDriver *find_hdev_driver(const char *filename)
{
int score_max = 0, score;
BlockDriver *drv = NULL, *d;
+ GLOBAL_STATE_CODE();
QLIST_FOREACH(d, &bdrv_drivers, list) {
if (d->bdrv_probe_device) {
@@ -887,6 +920,7 @@ static BlockDriver *find_hdev_driver(const char *filename)
static BlockDriver *bdrv_do_find_protocol(const char *protocol)
{
BlockDriver *drv1;
+ GLOBAL_STATE_CODE();
QLIST_FOREACH(drv1, &bdrv_drivers, list) {
if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
@@ -907,6 +941,7 @@ BlockDriver *bdrv_find_protocol(const char *filename,
const char *p;
int i;
+ GLOBAL_STATE_CODE();
/* TODO Drivers without bdrv_file_open must be specified explicitly */
/*
@@ -972,6 +1007,7 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
{
int score_max = 0, score;
BlockDriver *drv = NULL, *d;
+ IO_CODE();
QLIST_FOREACH(d, &bdrv_drivers, list) {
if (d->bdrv_probe) {
@@ -993,6 +1029,8 @@ static int find_image_format(BlockBackend *file, const char *filename,
uint8_t buf[BLOCK_PROBE_BUF_SIZE];
int ret = 0;
+ GLOBAL_STATE_CODE();
+
/* Return the raw BlockDriver * to scsi-generic devices or empty drives */
if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
*pdrv = &bdrv_raw;
@@ -1024,6 +1062,7 @@ static int find_image_format(BlockBackend *file, const char *filename,
int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
{
BlockDriver *drv = bs->drv;
+ IO_CODE();
if (!drv) {
return -ENOMEDIUM;
@@ -1058,6 +1097,7 @@ int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
static void bdrv_join_options(BlockDriverState *bs, QDict *options,
QDict *old_options)
{
+ GLOBAL_STATE_CODE();
if (bs->drv && bs->drv->bdrv_join_options) {
bs->drv->bdrv_join_options(options, old_options);
} else {
@@ -1074,6 +1114,7 @@ static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
BlockdevDetectZeroesOptions detect_zeroes =
qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value,
BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err);
+ GLOBAL_STATE_CODE();
g_free(value);
if (local_err) {
error_propagate(errp, local_err);
@@ -1189,6 +1230,7 @@ static void bdrv_child_cb_drained_end(BdrvChild *child,
static int bdrv_child_cb_inactivate(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
+ GLOBAL_STATE_CODE();
assert(bs->open_flags & BDRV_O_INACTIVE);
return 0;
}
@@ -1215,6 +1257,7 @@ static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx,
static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
int parent_flags, QDict *parent_options)
{
+ GLOBAL_STATE_CODE();
*child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
/* For temporary files, unconditional cache=unsafe is fine */
@@ -1235,6 +1278,7 @@ static void bdrv_backing_attach(BdrvChild *c)
BlockDriverState *parent = c->opaque;
BlockDriverState *backing_hd = c->bs;
+ GLOBAL_STATE_CODE();
assert(!parent->backing_blocker);
error_setg(&parent->backing_blocker,
"node is used as backing hd of '%s'",
@@ -1273,6 +1317,7 @@ static void bdrv_backing_detach(BdrvChild *c)
{
BlockDriverState *parent = c->opaque;
+ GLOBAL_STATE_CODE();
assert(parent->backing_blocker);
bdrv_op_unblock_all(c->bs, parent->backing_blocker);
error_free(parent->backing_blocker);
@@ -1285,6 +1330,7 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
BlockDriverState *parent = c->opaque;
bool read_only = bdrv_is_read_only(parent);
int ret;
+ GLOBAL_STATE_CODE();
if (read_only) {
ret = bdrv_reopen_set_read_only(parent, false, errp);
@@ -1316,6 +1362,7 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
int parent_flags, QDict *parent_options)
{
int flags = parent_flags;
+ GLOBAL_STATE_CODE();
/*
* First, decide whether to set, clear, or leave BDRV_O_PROTOCOL.
@@ -1391,6 +1438,7 @@ static void bdrv_child_cb_attach(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
+ assert_bdrv_graph_writable(bs);
QLIST_INSERT_HEAD(&bs->children, child, next);
if (child->role & BDRV_CHILD_COW) {
@@ -1410,6 +1458,7 @@ static void bdrv_child_cb_detach(BdrvChild *child)
bdrv_unapply_subtree_drain(child, bs);
+ assert_bdrv_graph_writable(bs);
QLIST_REMOVE(child, next);
}
@@ -1425,6 +1474,7 @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c)
{
BlockDriverState *bs = c->opaque;
+ IO_CODE();
return bdrv_get_aio_context(bs);
}
@@ -1447,12 +1497,14 @@ const BdrvChildClass child_of_bds = {
AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
{
+ GLOBAL_STATE_CODE();
return c->klass->get_parent_aio_context(c);
}
static int bdrv_open_flags(BlockDriverState *bs, int flags)
{
int open_flags = flags;
+ GLOBAL_STATE_CODE();
/*
* Clear flags that are internal to the block layer before opening the
@@ -1465,6 +1517,8 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
static void update_flags_from_options(int *flags, QemuOpts *opts)
{
+ GLOBAL_STATE_CODE();
+
*flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY);
if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
@@ -1486,6 +1540,7 @@ static void update_flags_from_options(int *flags, QemuOpts *opts)
static void update_options_from_flags(QDict *options, int flags)
{
+ GLOBAL_STATE_CODE();
if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
}
@@ -1507,6 +1562,7 @@ static void bdrv_assign_node_name(BlockDriverState *bs,
Error **errp)
{
char *gen_node_name = NULL;
+ GLOBAL_STATE_CODE();
if (!node_name) {
node_name = gen_node_name = id_generate(ID_BLOCK);
@@ -1551,6 +1607,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
{
Error *local_err = NULL;
int i, ret;
+ GLOBAL_STATE_CODE();
bdrv_assign_node_name(bs, node_name, &local_err);
if (local_err) {
@@ -1631,6 +1688,8 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
BlockDriverState *bs;
int ret;
+ GLOBAL_STATE_CODE();
+
bs = bdrv_new();
bs->open_flags = flags;
bs->options = options ?: qdict_new();
@@ -1656,6 +1715,7 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
int flags, Error **errp)
{
+ GLOBAL_STATE_CODE();
return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp);
}
@@ -1750,6 +1810,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
assert(bs->file == NULL);
assert(options != NULL && bs->options != options);
+ GLOBAL_STATE_CODE();
opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
if (!qemu_opts_absorb_qdict(opts, options, errp)) {
@@ -1875,6 +1936,7 @@ static QDict *parse_json_filename(const char *filename, Error **errp)
QObject *options_obj;
QDict *options;
int ret;
+ GLOBAL_STATE_CODE();
ret = strstart(filename, "json:", &filename);
assert(ret);
@@ -1902,6 +1964,7 @@ static void parse_json_protocol(QDict *options, const char **pfilename,
{
QDict *json_options;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
/* Parse json: pseudo-protocol */
if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
@@ -1936,6 +1999,8 @@ static int bdrv_fill_options(QDict **options, const char *filename,
BlockDriver *drv = NULL;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
+
/*
* Caution: while qdict_get_try_str() is fine, getting non-string
* types would require more care. When @options come from
@@ -2057,11 +2122,13 @@ static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
*/
bool bdrv_is_writable(BlockDriverState *bs)
{
+ IO_CODE();
return bdrv_is_writable_after_reopen(bs, NULL);
}
static char *bdrv_child_user_desc(BdrvChild *c)
{
+ GLOBAL_STATE_CODE();
return c->klass->get_parent_desc(c);
}
@@ -2078,6 +2145,7 @@ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
assert(a->bs);
assert(a->bs == b->bs);
+ GLOBAL_STATE_CODE();
if ((b->perm & a->shared_perm) == b->perm) {
return true;
@@ -2101,6 +2169,7 @@ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
{
BdrvChild *a, *b;
+ GLOBAL_STATE_CODE();
/*
* During the loop we'll look at each pair twice. That's correct because
@@ -2129,6 +2198,7 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
uint64_t *nperm, uint64_t *nshared)
{
assert(bs->drv && bs->drv->bdrv_child_perm);
+ GLOBAL_STATE_CODE();
bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
parent_perm, parent_shared,
nperm, nshared);
@@ -2155,6 +2225,8 @@ static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found,
BdrvChild *child;
g_autoptr(GHashTable) local_found = NULL;
+ GLOBAL_STATE_CODE();
+
if (!found) {
assert(!list);
found = local_found = g_hash_table_new(NULL, NULL);
@@ -2182,6 +2254,8 @@ static void bdrv_child_set_perm_abort(void *opaque)
{
BdrvChildSetPermState *s = opaque;
+ GLOBAL_STATE_CODE();
+
s->child->perm = s->old_perm;
s->child->shared_perm = s->old_shared_perm;
}
@@ -2195,6 +2269,7 @@ static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm,
uint64_t shared, Transaction *tran)
{
BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1);
+ GLOBAL_STATE_CODE();
*s = (BdrvChildSetPermState) {
.child = c,
@@ -2212,6 +2287,7 @@ static void bdrv_drv_set_perm_commit(void *opaque)
{
BlockDriverState *bs = opaque;
uint64_t cumulative_perms, cumulative_shared_perms;
+ GLOBAL_STATE_CODE();
if (bs->drv->bdrv_set_perm) {
bdrv_get_cumulative_perm(bs, &cumulative_perms,
@@ -2223,6 +2299,7 @@ static void bdrv_drv_set_perm_commit(void *opaque)
static void bdrv_drv_set_perm_abort(void *opaque)
{
BlockDriverState *bs = opaque;
+ GLOBAL_STATE_CODE();
if (bs->drv->bdrv_abort_perm_update) {
bs->drv->bdrv_abort_perm_update(bs);
@@ -2238,6 +2315,7 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm,
uint64_t shared_perm, Transaction *tran,
Error **errp)
{
+ GLOBAL_STATE_CODE();
if (!bs->drv) {
return 0;
}
@@ -2266,6 +2344,7 @@ typedef struct BdrvReplaceChildState {
static void bdrv_replace_child_commit(void *opaque)
{
BdrvReplaceChildState *s = opaque;
+ GLOBAL_STATE_CODE();
if (s->free_empty_child && !s->child->bs) {
bdrv_child_free(s->child);
@@ -2278,6 +2357,7 @@ static void bdrv_replace_child_abort(void *opaque)
BdrvReplaceChildState *s = opaque;
BlockDriverState *new_bs = s->child->bs;
+ GLOBAL_STATE_CODE();
/*
* old_bs reference is transparently moved from @s to s->child.
*
@@ -2374,6 +2454,7 @@ static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
BdrvChild *c;
int ret;
uint64_t cumulative_perms, cumulative_shared_perms;
+ GLOBAL_STATE_CODE();
bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
@@ -2442,6 +2523,7 @@ static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
{
int ret;
BlockDriverState *bs;
+ GLOBAL_STATE_CODE();
for ( ; list; list = list->next) {
bs = list->data;
@@ -2466,6 +2548,8 @@ void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
uint64_t cumulative_perms = 0;
uint64_t cumulative_shared_perms = BLK_PERM_ALL;
+ GLOBAL_STATE_CODE();
+
QLIST_FOREACH(c, &bs->parents, next_parent) {
cumulative_perms |= c->perm;
cumulative_shared_perms &= c->shared_perm;
@@ -2509,6 +2593,7 @@ static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp)
int ret;
Transaction *tran = tran_new();
g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
+ GLOBAL_STATE_CODE();
ret = bdrv_list_refresh_perms(list, NULL, tran, errp);
tran_finalize(tran, ret);
@@ -2523,6 +2608,8 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
Transaction *tran = tran_new();
int ret;
+ GLOBAL_STATE_CODE();
+
bdrv_child_set_perm(c, perm, shared, tran);
ret = bdrv_refresh_perms(c->bs, &local_err);
@@ -2553,6 +2640,8 @@ int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
uint64_t parent_perms, parent_shared;
uint64_t perms, shared;
+ GLOBAL_STATE_CODE();
+
bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared);
bdrv_child_perm(bs, c->bs, c, c->role, NULL,
parent_perms, parent_shared, &perms, &shared);
@@ -2571,6 +2660,7 @@ static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
+ GLOBAL_STATE_CODE();
*nperm = perm & DEFAULT_PERM_PASSTHROUGH;
*nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
}
@@ -2582,6 +2672,7 @@ static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
uint64_t *nperm, uint64_t *nshared)
{
assert(role & BDRV_CHILD_COW);
+ GLOBAL_STATE_CODE();
/*
* We want consistent read from backing files if the parent needs it.
@@ -2618,6 +2709,7 @@ static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c,
{
int flags;
+ GLOBAL_STATE_CODE();
assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA));
flags = bdrv_reopen_get_flags(reopen_queue, bs);
@@ -2694,6 +2786,7 @@ void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
+ GLOBAL_STATE_CODE();
if (role & BDRV_CHILD_FILTERED) {
assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
BDRV_CHILD_COW)));
@@ -2752,6 +2845,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
assert(!child->frozen);
assert(old_bs != new_bs);
+ GLOBAL_STATE_CODE();
if (old_bs && new_bs) {
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
@@ -2776,6 +2870,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
if (child->klass->detach) {
child->klass->detach(child);
}
+ assert_bdrv_graph_writable(old_bs);
QLIST_REMOVE(child, next_parent);
}
@@ -2785,6 +2880,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
}
if (new_bs) {
+ assert_bdrv_graph_writable(new_bs);
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
/*
@@ -2827,6 +2923,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
static void bdrv_child_free(BdrvChild *child)
{
assert(!child->bs);
+ GLOBAL_STATE_CODE();
assert(!child->next.le_prev); /* not in children list */
g_free(child->name);
@@ -2845,6 +2942,7 @@ static void bdrv_attach_child_common_abort(void *opaque)
BdrvChild *child = *s->child;
BlockDriverState *bs = child->bs;
+ GLOBAL_STATE_CODE();
/*
* Pass free_empty_child=false, because we still need the child
* for the AioContext operations on the parent below; those
@@ -2907,6 +3005,7 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs,
assert(child);
assert(*child == NULL);
assert(child_class->get_parent_desc);
+ GLOBAL_STATE_CODE();
new_child = g_new(BdrvChild, 1);
*new_child = (BdrvChild) {
@@ -2987,6 +3086,7 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
uint64_t perm, shared_perm;
assert(parent_bs->drv);
+ GLOBAL_STATE_CODE();
if (bdrv_recurse_has_child(child_bs, parent_bs)) {
error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle",
@@ -3012,6 +3112,7 @@ static void bdrv_detach_child(BdrvChild **childp)
{
BlockDriverState *old_bs = (*childp)->bs;
+ GLOBAL_STATE_CODE();
bdrv_replace_child_noperm(childp, NULL, true);
if (old_bs) {
@@ -3051,6 +3152,8 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
BdrvChild *child = NULL;
Transaction *tran = tran_new();
+ GLOBAL_STATE_CODE();
+
ret = bdrv_attach_child_common(child_bs, child_name, child_class,
child_role, perm, shared_perm, opaque,
&child, tran, errp);
@@ -3091,6 +3194,8 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BdrvChild *child = NULL;
Transaction *tran = tran_new();
+ GLOBAL_STATE_CODE();
+
ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class,
child_role, &child, tran, errp);
if (ret < 0) {
@@ -3117,6 +3222,8 @@ void bdrv_root_unref_child(BdrvChild *child)
{
BlockDriverState *child_bs;
+ GLOBAL_STATE_CODE();
+
child_bs = child->bs;
bdrv_detach_child(&child);
bdrv_unref(child_bs);
@@ -3191,6 +3298,7 @@ static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child,
/* Callers must ensure that child->frozen is false. */
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
{
+ GLOBAL_STATE_CODE();
if (child == NULL) {
return;
}
@@ -3203,6 +3311,7 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
{
BdrvChild *c;
+ GLOBAL_STATE_CODE();
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (c->klass->change_media) {
c->klass->change_media(c, load);
@@ -3253,6 +3362,8 @@ static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file;
BdrvChildRole role;
+ GLOBAL_STATE_CODE();
+
if (!parent_bs->drv) {
/*
* Node without drv is an object without a class :/. TODO: finally fix
@@ -3332,6 +3443,7 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs,
BlockDriverState *backing_hd,
Transaction *tran, Error **errp)
{
+ GLOBAL_STATE_CODE();
return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
}
@@ -3341,6 +3453,7 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
int ret;
Transaction *tran = tran_new();
+ GLOBAL_STATE_CODE();
bdrv_drained_begin(bs);
ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
@@ -3380,6 +3493,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
QDict *tmp_parent_options = NULL;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
+
if (bs->backing != NULL) {
goto free_exit;
}
@@ -3539,6 +3654,8 @@ BdrvChild *bdrv_open_child(const char *filename,
{
BlockDriverState *bs;
+ GLOBAL_STATE_CODE();
+
bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class,
child_role, allow_none, errp);
if (bs == NULL) {
@@ -3561,6 +3678,8 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp)
const char *reference = NULL;
Visitor *v = NULL;
+ GLOBAL_STATE_CODE();
+
if (ref->type == QTYPE_QSTRING) {
reference = ref->u.reference;
} else {
@@ -3603,6 +3722,8 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
BlockDriverState *bs_snapshot = NULL;
int ret;
+ GLOBAL_STATE_CODE();
+
/* if snapshot, we create a temporary backing file and open it
instead of opening 'filename' directly */
@@ -3690,6 +3811,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
assert(!child_class || !flags);
assert(!child_class == !parent);
+ GLOBAL_STATE_CODE();
if (reference) {
bool options_non_empty = options ? qdict_size(options) : false;
@@ -3958,6 +4080,8 @@ close_and_fail:
BlockDriverState *bdrv_open(const char *filename, const char *reference,
QDict *options, int flags, Error **errp)
{
+ GLOBAL_STATE_CODE();
+
return bdrv_open_inherit(filename, reference, options, flags, NULL,
NULL, 0, errp);
}
@@ -4074,6 +4198,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
* important to avoid graph changes between the recursive queuing here and
* bdrv_reopen_multiple(). */
assert(bs->quiesce_counter > 0);
+ GLOBAL_STATE_CODE();
if (bs_queue == NULL) {
bs_queue = g_new0(BlockReopenQueue, 1);
@@ -4212,12 +4337,15 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
BlockDriverState *bs,
QDict *options, bool keep_old_opts)
{
+ GLOBAL_STATE_CODE();
+
return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false,
NULL, 0, keep_old_opts);
}
void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
{
+ GLOBAL_STATE_CODE();
if (bs_queue) {
BlockReopenQueueEntry *bs_entry, *next;
QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
@@ -4259,6 +4387,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
assert(bs_queue != NULL);
+ GLOBAL_STATE_CODE();
QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
ctx = bdrv_get_aio_context(bs_entry->state.bs);
@@ -4365,6 +4494,8 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
BlockReopenQueue *queue;
int ret;
+ GLOBAL_STATE_CODE();
+
bdrv_subtree_drained_begin(bs);
if (ctx != qemu_get_aio_context()) {
aio_context_release(ctx);
@@ -4386,6 +4517,8 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
{
QDict *opts = qdict_new();
+ GLOBAL_STATE_CODE();
+
qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
return bdrv_reopen(bs, opts, true, errp);
@@ -4420,6 +4553,8 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
QObject *value;
const char *str;
+ GLOBAL_STATE_CODE();
+
value = qdict_get(reopen_state->options, child_name);
if (value == NULL) {
return 0;
@@ -4518,6 +4653,7 @@ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
assert(reopen_state != NULL);
assert(reopen_state->bs->drv != NULL);
+ GLOBAL_STATE_CODE();
drv = reopen_state->bs->drv;
/* This function and each driver's bdrv_reopen_prepare() remove
@@ -4728,6 +4864,7 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state)
bs = reopen_state->bs;
drv = bs->drv;
assert(drv != NULL);
+ GLOBAL_STATE_CODE();
/* If there are any driver level actions to take */
if (drv->bdrv_reopen_commit) {
@@ -4769,6 +4906,7 @@ static void bdrv_reopen_abort(BDRVReopenState *reopen_state)
assert(reopen_state != NULL);
drv = reopen_state->bs->drv;
assert(drv != NULL);
+ GLOBAL_STATE_CODE();
if (drv->bdrv_reopen_abort) {
drv->bdrv_reopen_abort(reopen_state);
@@ -4781,6 +4919,7 @@ static void bdrv_close(BlockDriverState *bs)
BdrvAioNotifier *ban, *ban_next;
BdrvChild *child, *next;
+ GLOBAL_STATE_CODE();
assert(!bs->refcnt);
bdrv_drained_begin(bs); /* complete I/O */
@@ -4840,6 +4979,7 @@ static void bdrv_close(BlockDriverState *bs)
void bdrv_close_all(void)
{
assert(job_next(NULL) == NULL);
+ GLOBAL_STATE_CODE();
/* Drop references from requests still in flight, such as canceled block
* jobs whose AIO context has not been polled yet */
@@ -4958,7 +5098,7 @@ static void bdrv_remove_filter_or_cow_child_abort(void *opaque)
static void bdrv_remove_filter_or_cow_child_commit(void *opaque)
{
BdrvRemoveFilterOrCowChild *s = opaque;
-
+ GLOBAL_STATE_CODE();
bdrv_child_free(s->child);
}
@@ -5041,6 +5181,7 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
BdrvChild *c, *next;
assert(to != NULL);
+ GLOBAL_STATE_CODE();
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
assert(c->bs == from);
@@ -5091,6 +5232,7 @@ static int bdrv_replace_node_common(BlockDriverState *from,
BlockDriverState *to_cow_parent = NULL;
int ret;
+ GLOBAL_STATE_CODE();
assert(to != NULL);
if (detach_subchain) {
@@ -5154,11 +5296,15 @@ out:
int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
Error **errp)
{
+ GLOBAL_STATE_CODE();
+
return bdrv_replace_node_common(from, to, true, false, errp);
}
int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
{
+ GLOBAL_STATE_CODE();
+
return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true,
errp);
}
@@ -5181,6 +5327,8 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
int ret;
Transaction *tran = tran_new();
+ GLOBAL_STATE_CODE();
+
assert(!bs_new->backing);
ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
@@ -5214,6 +5362,8 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
g_autoptr(GSList) refresh_list = NULL;
BlockDriverState *old_bs = child->bs;
+ GLOBAL_STATE_CODE();
+
bdrv_ref(old_bs);
bdrv_drained_begin(old_bs);
bdrv_drained_begin(new_bs);
@@ -5241,6 +5391,7 @@ static void bdrv_delete(BlockDriverState *bs)
{
assert(bdrv_op_blocker_is_empty(bs));
assert(!bs->refcnt);
+ GLOBAL_STATE_CODE();
/* remove from list, if necessary */
if (bs->node_name[0] != '\0') {
@@ -5285,6 +5436,8 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
node_name = qdict_get_try_str(options, "node-name");
+ GLOBAL_STATE_CODE();
+
new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags,
errp);
options = NULL; /* bdrv_new_open_driver() eats options */
@@ -5320,6 +5473,7 @@ fail:
int coroutine_fn bdrv_co_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix)
{
+ IO_CODE();
if (bs->drv == NULL) {
return -ENOMEDIUM;
}
@@ -5345,6 +5499,8 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
BlockDriver *drv = bs->drv;
int ret;
+ GLOBAL_STATE_CODE();
+
if (!drv) {
return -ENOMEDIUM;
}
@@ -5386,6 +5542,9 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
BlockDriverState *bs)
{
+
+ GLOBAL_STATE_CODE();
+
bs = bdrv_skip_filters(bs);
active = bdrv_skip_filters(active);
@@ -5403,6 +5562,8 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
/* Given a BDS, searches for the base layer. */
BlockDriverState *bdrv_find_base(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
+
return bdrv_find_overlay(bs, NULL);
}
@@ -5417,6 +5578,8 @@ bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
BlockDriverState *i;
BdrvChild *child;
+ GLOBAL_STATE_CODE();
+
for (i = bs; i != base; i = child_bs(child)) {
child = bdrv_filter_or_cow_child(i);
@@ -5443,6 +5606,8 @@ int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
BlockDriverState *i;
BdrvChild *child;
+ GLOBAL_STATE_CODE();
+
if (bdrv_is_backing_chain_frozen(bs, base, errp)) {
return -EPERM;
}
@@ -5477,6 +5642,8 @@ void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base)
BlockDriverState *i;
BdrvChild *child;
+ GLOBAL_STATE_CODE();
+
for (i = bs; i != base; i = child_bs(child)) {
child = bdrv_filter_or_cow_child(i);
if (child) {
@@ -5526,6 +5693,8 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
g_autoptr(GSList) updated_children = NULL;
GSList *p;
+ GLOBAL_STATE_CODE();
+
bdrv_ref(top);
bdrv_subtree_drained_begin(top);
@@ -5637,6 +5806,8 @@ static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
+ IO_CODE();
+
if (!drv) {
return -ENOMEDIUM;
}
@@ -5686,6 +5857,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
BlockDriverState *in_bs, Error **errp)
{
+ IO_CODE();
if (!drv->bdrv_measure) {
error_setg(errp, "Block driver '%s' does not support size measurement",
drv->format_name);
@@ -5701,6 +5873,7 @@ BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
int64_t bdrv_nb_sectors(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
+ IO_CODE();
if (!drv)
return -ENOMEDIUM;
@@ -5721,6 +5894,7 @@ int64_t bdrv_nb_sectors(BlockDriverState *bs)
int64_t bdrv_getlength(BlockDriverState *bs)
{
int64_t ret = bdrv_nb_sectors(bs);
+ IO_CODE();
if (ret < 0) {
return ret;
@@ -5735,12 +5909,14 @@ int64_t bdrv_getlength(BlockDriverState *bs)
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
{
int64_t nb_sectors = bdrv_nb_sectors(bs);
+ IO_CODE();
*nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
}
bool bdrv_is_sg(BlockDriverState *bs)
{
+ IO_CODE();
return bs->sg;
}
@@ -5750,6 +5926,7 @@ bool bdrv_is_sg(BlockDriverState *bs)
bool bdrv_supports_compressed_writes(BlockDriverState *bs)
{
BlockDriverState *filtered;
+ IO_CODE();
if (!bs->drv || !block_driver_can_compress(bs->drv)) {
return false;
@@ -5769,6 +5946,7 @@ bool bdrv_supports_compressed_writes(BlockDriverState *bs)
const char *bdrv_get_format_name(BlockDriverState *bs)
{
+ IO_CODE();
return bs->drv ? bs->drv->format_name : NULL;
}
@@ -5785,6 +5963,8 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
int i;
const char **formats = NULL;
+ GLOBAL_STATE_CODE();
+
QLIST_FOREACH(drv, &bdrv_drivers, list) {
if (drv->format_name) {
bool found = false;
@@ -5843,6 +6023,7 @@ BlockDriverState *bdrv_find_node(const char *node_name)
BlockDriverState *bs;
assert(node_name);
+ GLOBAL_STATE_CODE();
QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
if (!strcmp(node_name, bs->node_name)) {
@@ -5859,6 +6040,8 @@ BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
BlockDeviceInfoList *list;
BlockDriverState *bs;
+ GLOBAL_STATE_CODE();
+
list = NULL;
QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp);
@@ -5934,6 +6117,7 @@ static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent,
{
BlockPermission qapi_perm;
XDbgBlockGraphEdge *edge;
+ GLOBAL_STATE_CODE();
edge = g_new0(XDbgBlockGraphEdge, 1);
@@ -5964,6 +6148,8 @@ XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp)
BdrvChild *child;
XDbgBlockGraphConstructor *gr = xdbg_graph_new();
+ GLOBAL_STATE_CODE();
+
for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
char *allocated_name = NULL;
const char *name = blk_name(blk);
@@ -6007,6 +6193,8 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
BlockBackend *blk;
BlockDriverState *bs;
+ GLOBAL_STATE_CODE();
+
if (device) {
blk = blk_by_name(device);
@@ -6038,6 +6226,9 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
* return false. If either argument is NULL, return false. */
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
{
+
+ GLOBAL_STATE_CODE();
+
while (top && top != base) {
top = bdrv_filter_or_cow_bs(top);
}
@@ -6047,6 +6238,7 @@ bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
BlockDriverState *bdrv_next_node(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
if (!bs) {
return QTAILQ_FIRST(&graph_bdrv_states);
}
@@ -6055,6 +6247,7 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs)
BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
if (!bs) {
return QTAILQ_FIRST(&all_bdrv_states);
}
@@ -6063,6 +6256,7 @@ BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
const char *bdrv_get_node_name(const BlockDriverState *bs)
{
+ IO_CODE();
return bs->node_name;
}
@@ -6070,6 +6264,7 @@ const char *bdrv_get_parent_name(const BlockDriverState *bs)
{
BdrvChild *c;
const char *name;
+ IO_CODE();
/* If multiple parents have a name, just pick the first one. */
QLIST_FOREACH(c, &bs->parents, next_parent) {
@@ -6087,6 +6282,7 @@ const char *bdrv_get_parent_name(const BlockDriverState *bs)
/* TODO check what callers really want: bs->node_name or blk_name() */
const char *bdrv_get_device_name(const BlockDriverState *bs)
{
+ IO_CODE();
return bdrv_get_parent_name(bs) ?: "";
}
@@ -6096,22 +6292,26 @@ const char *bdrv_get_device_name(const BlockDriverState *bs)
* absent, then this returns an empty (non-null) string. */
const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
{
+ IO_CODE();
return bdrv_get_parent_name(bs) ?: bs->node_name;
}
int bdrv_get_flags(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
return bs->open_flags;
}
int bdrv_has_zero_init_1(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
return 1;
}
int bdrv_has_zero_init(BlockDriverState *bs)
{
BlockDriverState *filtered;
+ GLOBAL_STATE_CODE();
if (!bs->drv) {
return 0;
@@ -6137,6 +6337,7 @@ int bdrv_has_zero_init(BlockDriverState *bs)
bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
{
+ IO_CODE();
if (!(bs->open_flags & BDRV_O_UNMAP)) {
return false;
}
@@ -6147,6 +6348,7 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
void bdrv_get_backing_filename(BlockDriverState *bs,
char *filename, int filename_size)
{
+ IO_CODE();
pstrcpy(filename, filename_size, bs->backing_file);
}
@@ -6154,6 +6356,7 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
int ret;
BlockDriver *drv = bs->drv;
+ IO_CODE();
/* if bs->drv == NULL, bs is closed, so there's nothing to do here */
if (!drv) {
return -ENOMEDIUM;
@@ -6182,6 +6385,7 @@ ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
Error **errp)
{
BlockDriver *drv = bs->drv;
+ IO_CODE();
if (drv && drv->bdrv_get_specific_info) {
return drv->bdrv_get_specific_info(bs, errp);
}
@@ -6191,6 +6395,7 @@ ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
+ IO_CODE();
if (!drv || !drv->bdrv_get_specific_stats) {
return NULL;
}
@@ -6199,6 +6404,7 @@ BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs)
void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
{
+ IO_CODE();
if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
return;
}
@@ -6208,6 +6414,7 @@ void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
bs = bdrv_primary_bs(bs);
}
@@ -6223,6 +6430,7 @@ static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs)
int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
const char *tag)
{
+ GLOBAL_STATE_CODE();
bs = bdrv_find_debug_node(bs);
if (bs) {
return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
@@ -6233,6 +6441,7 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
{
+ GLOBAL_STATE_CODE();
bs = bdrv_find_debug_node(bs);
if (bs) {
return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
@@ -6243,6 +6452,7 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
{
+ GLOBAL_STATE_CODE();
while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
bs = bdrv_primary_bs(bs);
}
@@ -6256,6 +6466,7 @@ int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
{
+ GLOBAL_STATE_CODE();
while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
bs = bdrv_primary_bs(bs);
}
@@ -6283,6 +6494,8 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
BlockDriverState *retval = NULL;
BlockDriverState *bs_below;
+ GLOBAL_STATE_CODE();
+
if (!bs || !bs->drv || !backing_file) {
return NULL;
}
@@ -6393,19 +6606,21 @@ void bdrv_init_with_whitelist(void)
bdrv_init();
}
-int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
+int bdrv_activate(BlockDriverState *bs, Error **errp)
{
BdrvChild *child, *parent;
Error *local_err = NULL;
int ret;
BdrvDirtyBitmap *bm;
+ GLOBAL_STATE_CODE();
+
if (!bs->drv) {
return -ENOMEDIUM;
}
QLIST_FOREACH(child, &bs->children, next) {
- bdrv_co_invalidate_cache(child->bs, &local_err);
+ bdrv_activate(child->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return -EINVAL;
@@ -6418,7 +6633,7 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
* Note that the required permissions of inactive images are always a
* subset of the permissions required after activating the image. This
* allows us to just get the permissions upfront without restricting
- * drv->bdrv_invalidate_cache().
+ * bdrv_co_invalidate_cache().
*
* It also means that in error cases, we don't have to try and revert to
* the old permissions (which is an operation that could fail, too). We can
@@ -6433,13 +6648,10 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
return ret;
}
- if (bs->drv->bdrv_co_invalidate_cache) {
- bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
- if (local_err) {
- bs->open_flags |= BDRV_O_INACTIVE;
- error_propagate(errp, local_err);
- return -EINVAL;
- }
+ ret = bdrv_invalidate_cache(bs, errp);
+ if (ret < 0) {
+ bs->open_flags |= BDRV_O_INACTIVE;
+ return ret;
}
FOR_EACH_DIRTY_BITMAP(bs, bm) {
@@ -6468,17 +6680,37 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
return 0;
}
-void bdrv_invalidate_cache_all(Error **errp)
+int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
+{
+ Error *local_err = NULL;
+ IO_CODE();
+
+ assert(!(bs->open_flags & BDRV_O_INACTIVE));
+
+ if (bs->drv->bdrv_co_invalidate_cache) {
+ bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+void bdrv_activate_all(Error **errp)
{
BlockDriverState *bs;
BdrvNextIterator it;
+ GLOBAL_STATE_CODE();
+
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
int ret;
aio_context_acquire(aio_context);
- ret = bdrv_invalidate_cache(bs, errp);
+ ret = bdrv_activate(bs, errp);
aio_context_release(aio_context);
if (ret < 0) {
bdrv_next_cleanup(&it);
@@ -6490,6 +6722,7 @@ void bdrv_invalidate_cache_all(Error **errp)
static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active)
{
BdrvChild *parent;
+ GLOBAL_STATE_CODE();
QLIST_FOREACH(parent, &bs->parents, next_parent) {
if (parent->klass->parent_is_bds) {
@@ -6509,6 +6742,8 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
int ret;
uint64_t cumulative_perms, cumulative_shared_perms;
+ GLOBAL_STATE_CODE();
+
if (!bs->drv) {
return -ENOMEDIUM;
}
@@ -6572,6 +6807,8 @@ int bdrv_inactivate_all(void)
int ret = 0;
GSList *aio_ctxs = NULL, *ctx;
+ GLOBAL_STATE_CODE();
+
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
@@ -6615,6 +6852,7 @@ bool bdrv_is_inserted(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
BdrvChild *child;
+ IO_CODE();
if (!drv) {
return false;
@@ -6636,6 +6874,7 @@ bool bdrv_is_inserted(BlockDriverState *bs)
void bdrv_eject(BlockDriverState *bs, bool eject_flag)
{
BlockDriver *drv = bs->drv;
+ IO_CODE();
if (drv && drv->bdrv_eject) {
drv->bdrv_eject(bs, eject_flag);
@@ -6649,7 +6888,7 @@ void bdrv_eject(BlockDriverState *bs, bool eject_flag)
void bdrv_lock_medium(BlockDriverState *bs, bool locked)
{
BlockDriver *drv = bs->drv;
-
+ IO_CODE();
trace_bdrv_lock_medium(bs, locked);
if (drv && drv->bdrv_lock_medium) {
@@ -6660,6 +6899,7 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked)
/* Get a reference to bs */
void bdrv_ref(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
bs->refcnt++;
}
@@ -6668,6 +6908,7 @@ void bdrv_ref(BlockDriverState *bs)
* deleted. */
void bdrv_unref(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
if (!bs) {
return;
}
@@ -6685,6 +6926,7 @@ struct BdrvOpBlocker {
bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
{
BdrvOpBlocker *blocker;
+ GLOBAL_STATE_CODE();
assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
if (!QLIST_EMPTY(&bs->op_blockers[op])) {
blocker = QLIST_FIRST(&bs->op_blockers[op]);
@@ -6699,6 +6941,7 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
{
BdrvOpBlocker *blocker;
+ GLOBAL_STATE_CODE();
assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
blocker = g_new0(BdrvOpBlocker, 1);
@@ -6709,6 +6952,7 @@ void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
{
BdrvOpBlocker *blocker, *next;
+ GLOBAL_STATE_CODE();
assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
if (blocker->reason == reason) {
@@ -6721,6 +6965,7 @@ void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
{
int i;
+ GLOBAL_STATE_CODE();
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
bdrv_op_block(bs, i, reason);
}
@@ -6729,6 +6974,7 @@ void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
{
int i;
+ GLOBAL_STATE_CODE();
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
bdrv_op_unblock(bs, i, reason);
}
@@ -6737,7 +6983,7 @@ void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
{
int i;
-
+ GLOBAL_STATE_CODE();
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
if (!QLIST_EMPTY(&bs->op_blockers[i])) {
return false;
@@ -6759,6 +7005,8 @@ void bdrv_img_create(const char *filename, const char *fmt,
Error *local_err = NULL;
int ret = 0;
+ GLOBAL_STATE_CODE();
+
/* Find driver and parse its options */
drv = bdrv_find_format(fmt);
if (!drv) {
@@ -6936,6 +7184,7 @@ out:
AioContext *bdrv_get_aio_context(BlockDriverState *bs)
{
+ IO_CODE();
return bs ? bs->aio_context : qemu_get_aio_context();
}
@@ -6944,6 +7193,7 @@ AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs)
Coroutine *self = qemu_coroutine_self();
AioContext *old_ctx = qemu_coroutine_get_aio_context(self);
AioContext *new_ctx;
+ IO_CODE();
/*
* Increase bs->in_flight to ensure that this operation is completed before
@@ -6958,6 +7208,7 @@ AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs)
void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
{
+ IO_CODE();
aio_co_reschedule_self(old_ctx);
bdrv_dec_in_flight(bs);
}
@@ -6991,11 +7242,13 @@ void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
{
+ IO_CODE();
aio_co_enter(bdrv_get_aio_context(bs), co);
}
static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
{
+ GLOBAL_STATE_CODE();
QLIST_REMOVE(ban, list);
g_free(ban);
}
@@ -7005,6 +7258,7 @@ static void bdrv_detach_aio_context(BlockDriverState *bs)
BdrvAioNotifier *baf, *baf_tmp;
assert(!bs->walking_aio_notifiers);
+ GLOBAL_STATE_CODE();
bs->walking_aio_notifiers = true;
QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
if (baf->deleted) {
@@ -7032,6 +7286,7 @@ static void bdrv_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
BdrvAioNotifier *ban, *ban_tmp;
+ GLOBAL_STATE_CODE();
if (bs->quiesce_counter) {
aio_disable_external(new_context);
@@ -7078,6 +7333,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
BdrvChild *child, *parent;
g_assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+ GLOBAL_STATE_CODE();
if (old_context == new_context) {
return;
@@ -7150,6 +7406,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx,
GSList **ignore, Error **errp)
{
+ GLOBAL_STATE_CODE();
if (g_slist_find(*ignore, c)) {
return true;
}
@@ -7175,6 +7432,7 @@ static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx,
bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
GSList **ignore, Error **errp)
{
+ GLOBAL_STATE_CODE();
if (g_slist_find(*ignore, c)) {
return true;
}
@@ -7193,6 +7451,8 @@ bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
return true;
}
+ GLOBAL_STATE_CODE();
+
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) {
return false;
@@ -7213,6 +7473,8 @@ int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
GSList *ignore;
bool ret;
+ GLOBAL_STATE_CODE();
+
ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL;
ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp);
g_slist_free(ignore);
@@ -7231,6 +7493,7 @@ int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
Error **errp)
{
+ GLOBAL_STATE_CODE();
return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp);
}
@@ -7244,6 +7507,7 @@ void bdrv_add_aio_context_notifier(BlockDriverState *bs,
.detach_aio_context = detach_aio_context,
.opaque = opaque
};
+ GLOBAL_STATE_CODE();
QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
}
@@ -7255,6 +7519,7 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
void *opaque)
{
BdrvAioNotifier *ban, *ban_next;
+ GLOBAL_STATE_CODE();
QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
if (ban->attached_aio_context == attached_aio_context &&
@@ -7279,6 +7544,7 @@ int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
bool force,
Error **errp)
{
+ GLOBAL_STATE_CODE();
if (!bs->drv) {
error_setg(errp, "Node is ejected");
return -ENOMEDIUM;
@@ -7309,6 +7575,8 @@ bool bdrv_recurse_can_replace(BlockDriverState *bs,
{
BlockDriverState *filtered;
+ GLOBAL_STATE_CODE();
+
if (!bs || !bs->drv) {
return false;
}
@@ -7349,6 +7617,8 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
AioContext *aio_context;
+ GLOBAL_STATE_CODE();
+
if (!to_replace_bs) {
error_setg(errp, "Failed to find node with node-name='%s'", node_name);
return NULL;
@@ -7478,6 +7748,7 @@ static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs)
* would result in exactly bs->backing. */
static bool bdrv_backing_overridden(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
if (bs->backing) {
return strcmp(bs->auto_backing_file,
bs->backing->bs->filename);
@@ -7510,6 +7781,8 @@ void bdrv_refresh_filename(BlockDriverState *bs)
bool generate_json_filename; /* Whether our default implementation should
fill exact_filename (false) or not (true) */
+ GLOBAL_STATE_CODE();
+
if (!drv) {
return;
}
@@ -7632,6 +7905,8 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp)
BlockDriver *drv = bs->drv;
BlockDriverState *child_bs;
+ GLOBAL_STATE_CODE();
+
if (!drv) {
error_setg(errp, "Node '%s' is ejected", bs->node_name);
return NULL;
@@ -7663,7 +7938,7 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp)
void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
Error **errp)
{
-
+ GLOBAL_STATE_CODE();
if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
error_setg(errp, "The node %s does not support adding a child",
bdrv_get_device_or_node_name(parent_bs));
@@ -7683,6 +7958,7 @@ void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
{
BdrvChild *tmp;
+ GLOBAL_STATE_CODE();
if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
error_setg(errp, "The node %s does not support removing a child",
bdrv_get_device_or_node_name(parent_bs));
@@ -7710,6 +7986,7 @@ int bdrv_make_empty(BdrvChild *c, Error **errp)
BlockDriver *drv = c->bs->drv;
int ret;
+ GLOBAL_STATE_CODE();
assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED));
if (!drv->bdrv_make_empty) {
@@ -7734,6 +8011,8 @@ int bdrv_make_empty(BdrvChild *c, Error **errp)
*/
BdrvChild *bdrv_cow_child(BlockDriverState *bs)
{
+ IO_CODE();
+
if (!bs || !bs->drv) {
return NULL;
}
@@ -7757,6 +8036,7 @@ BdrvChild *bdrv_cow_child(BlockDriverState *bs)
BdrvChild *bdrv_filter_child(BlockDriverState *bs)
{
BdrvChild *c;
+ IO_CODE();
if (!bs || !bs->drv) {
return NULL;
@@ -7788,6 +8068,7 @@ BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs)
{
BdrvChild *cow_child = bdrv_cow_child(bs);
BdrvChild *filter_child = bdrv_filter_child(bs);
+ IO_CODE();
/* Filter nodes cannot have COW backing files */
assert(!(cow_child && filter_child));
@@ -7808,6 +8089,7 @@ BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs)
BdrvChild *bdrv_primary_child(BlockDriverState *bs)
{
BdrvChild *c, *found = NULL;
+ IO_CODE();
QLIST_FOREACH(c, &bs->children, next) {
if (c->role & BDRV_CHILD_PRIMARY) {
@@ -7860,6 +8142,7 @@ static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs,
*/
BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
return bdrv_do_skip_filters(bs, true);
}
@@ -7869,6 +8152,7 @@ BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
*/
BlockDriverState *bdrv_skip_filters(BlockDriverState *bs)
{
+ IO_CODE();
return bdrv_do_skip_filters(bs, false);
}
@@ -7878,6 +8162,7 @@ BlockDriverState *bdrv_skip_filters(BlockDriverState *bs)
*/
BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
{
+ IO_CODE();
return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
}
@@ -7913,8 +8198,8 @@ static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs,
*/
bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum)
{
+ IO_CODE();
RCU_READ_LOCK_GUARD();
-
return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum);
}
@@ -7924,6 +8209,7 @@ bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum)
void bdrv_bsc_invalidate_range(BlockDriverState *bs,
int64_t offset, int64_t bytes)
{
+ IO_CODE();
RCU_READ_LOCK_GUARD();
if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) {
@@ -7938,6 +8224,7 @@ void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes)
{
BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1);
BdrvBlockStatusCache *old_bsc;
+ IO_CODE();
*new_bsc = (BdrvBlockStatusCache) {
.valid = true,
diff --git a/block/amend.c b/block/amend.c
index 392df9ef83..f696a006e3 100644
--- a/block/amend.c
+++ b/block/amend.c
@@ -53,10 +53,31 @@ static int coroutine_fn blockdev_amend_run(Job *job, Error **errp)
return ret;
}
+static int blockdev_amend_pre_run(BlockdevAmendJob *s, Error **errp)
+{
+ if (s->bs->drv->bdrv_amend_pre_run) {
+ return s->bs->drv->bdrv_amend_pre_run(s->bs, errp);
+ }
+
+ return 0;
+}
+
+static void blockdev_amend_free(Job *job)
+{
+ BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common);
+
+ if (s->bs->drv->bdrv_amend_clean) {
+ s->bs->drv->bdrv_amend_clean(s->bs);
+ }
+
+ bdrv_unref(s->bs);
+}
+
static const JobDriver blockdev_amend_job_driver = {
.instance_size = sizeof(BlockdevAmendJob),
.job_type = JOB_TYPE_AMEND,
.run = blockdev_amend_run,
+ .free = blockdev_amend_free,
};
void qmp_x_blockdev_amend(const char *job_id,
@@ -110,8 +131,15 @@ void qmp_x_blockdev_amend(const char *job_id,
return;
}
+ bdrv_ref(bs);
s->bs = bs,
s->opts = QAPI_CLONE(BlockdevAmendOptions, options),
s->force = has_force ? force : false;
+
+ if (blockdev_amend_pre_run(s, errp)) {
+ job_early_fail(&s->common);
+ return;
+ }
+
job_start(&s->common);
}
diff --git a/block/backup.c b/block/backup.c
index 21d5983779..5cfd0b999c 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -372,6 +372,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
assert(bs);
assert(target);
+ GLOBAL_STATE_CODE();
/* QMP interface protects us from these cases */
assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL);
diff --git a/block/blkverify.c b/block/blkverify.c
index d1facf5ba9..53ba1c9195 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -16,6 +16,7 @@
#include "qemu/cutils.h"
#include "qemu/module.h"
#include "qemu/option.h"
+#include "qemu/memalign.h"
typedef struct {
BdrvChild *test_file;
diff --git a/block/block-backend.c b/block/block-backend.c
index 4ff6b4d785..e0e1aff4b1 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -79,6 +79,7 @@ struct BlockBackend {
bool allow_aio_context_change;
bool allow_write_beyond_eof;
+ /* Protected by BQL */
NotifierList remove_bs_notifiers, insert_bs_notifiers;
QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
@@ -111,12 +112,14 @@ static const AIOCBInfo block_backend_aiocb_info = {
static void drive_info_del(DriveInfo *dinfo);
static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
-/* All BlockBackends */
+/* All BlockBackends. Protected by BQL. */
static QTAILQ_HEAD(, BlockBackend) block_backends =
QTAILQ_HEAD_INITIALIZER(block_backends);
-/* All BlockBackends referenced by the monitor and which are iterated through by
- * blk_next() */
+/*
+ * All BlockBackends referenced by the monitor and which are iterated through by
+ * blk_next(). Protected by BQL.
+ */
static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
@@ -236,6 +239,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp)
void blk_set_force_allow_inactivate(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
blk->force_allow_inactivate = true;
}
@@ -354,6 +358,8 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
{
BlockBackend *blk;
+ GLOBAL_STATE_CODE();
+
blk = g_new0(BlockBackend, 1);
blk->refcnt = 1;
blk->ctx = ctx;
@@ -391,6 +397,8 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
{
BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm);
+ GLOBAL_STATE_CODE();
+
if (blk_insert_bs(blk, bs, errp) < 0) {
blk_unref(blk);
return NULL;
@@ -419,6 +427,8 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
uint64_t perm = 0;
uint64_t shared = BLK_PERM_ALL;
+ GLOBAL_STATE_CODE();
+
/*
* blk_new_open() is mainly used in .bdrv_create implementations and the
* tools where sharing isn't a major concern because the BDS stays private
@@ -496,6 +506,7 @@ static void drive_info_del(DriveInfo *dinfo)
int blk_get_refcnt(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk ? blk->refcnt : 0;
}
@@ -506,6 +517,7 @@ int blk_get_refcnt(BlockBackend *blk)
void blk_ref(BlockBackend *blk)
{
assert(blk->refcnt > 0);
+ GLOBAL_STATE_CODE();
blk->refcnt++;
}
@@ -516,6 +528,7 @@ void blk_ref(BlockBackend *blk)
*/
void blk_unref(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
if (blk) {
assert(blk->refcnt > 0);
if (blk->refcnt > 1) {
@@ -536,6 +549,7 @@ void blk_unref(BlockBackend *blk)
*/
BlockBackend *blk_all_next(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk ? QTAILQ_NEXT(blk, link)
: QTAILQ_FIRST(&block_backends);
}
@@ -544,6 +558,8 @@ void blk_remove_all_bs(void)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
+
while ((blk = blk_all_next(blk)) != NULL) {
AioContext *ctx = blk_get_aio_context(blk);
@@ -567,6 +583,7 @@ void blk_remove_all_bs(void)
*/
BlockBackend *blk_next(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk ? QTAILQ_NEXT(blk, monitor_link)
: QTAILQ_FIRST(&monitor_block_backends);
}
@@ -633,6 +650,7 @@ static void bdrv_next_reset(BdrvNextIterator *it)
BlockDriverState *bdrv_first(BdrvNextIterator *it)
{
+ GLOBAL_STATE_CODE();
bdrv_next_reset(it);
return bdrv_next(it);
}
@@ -670,6 +688,7 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
{
assert(!blk->name);
assert(name && name[0]);
+ GLOBAL_STATE_CODE();
if (!id_wellformed(name)) {
error_setg(errp, "Invalid device name");
@@ -697,6 +716,8 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
*/
void monitor_remove_blk(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
+
if (!blk->name) {
return;
}
@@ -712,6 +733,7 @@ void monitor_remove_blk(BlockBackend *blk)
*/
const char *blk_name(const BlockBackend *blk)
{
+ IO_CODE();
return blk->name ?: "";
}
@@ -723,6 +745,7 @@ BlockBackend *blk_by_name(const char *name)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
assert(name);
while ((blk = blk_next(blk)) != NULL) {
if (!strcmp(name, blk->name)) {
@@ -737,12 +760,16 @@ BlockBackend *blk_by_name(const char *name)
*/
BlockDriverState *blk_bs(BlockBackend *blk)
{
+ IO_CODE();
return blk->root ? blk->root->bs : NULL;
}
static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
{
BdrvChild *child;
+
+ GLOBAL_STATE_CODE();
+
QLIST_FOREACH(child, &bs->parents, next_parent) {
if (child->klass == &child_root) {
return child->opaque;
@@ -757,6 +784,7 @@ static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
*/
bool bdrv_has_blk(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
return bdrv_first_blk(bs) != NULL;
}
@@ -767,6 +795,7 @@ bool bdrv_is_root_node(BlockDriverState *bs)
{
BdrvChild *c;
+ GLOBAL_STATE_CODE();
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (c->klass != &child_root) {
return false;
@@ -781,6 +810,7 @@ bool bdrv_is_root_node(BlockDriverState *bs)
*/
DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->legacy_dinfo;
}
@@ -792,6 +822,7 @@ DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
{
assert(!blk->legacy_dinfo);
+ GLOBAL_STATE_CODE();
return blk->legacy_dinfo = dinfo;
}
@@ -802,6 +833,7 @@ DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
while ((blk = blk_next(blk)) != NULL) {
if (blk->legacy_dinfo == dinfo) {
@@ -816,6 +848,7 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
*/
BlockBackendPublic *blk_get_public(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return &blk->public;
}
@@ -824,6 +857,7 @@ BlockBackendPublic *blk_get_public(BlockBackend *blk)
*/
BlockBackend *blk_by_public(BlockBackendPublic *public)
{
+ GLOBAL_STATE_CODE();
return container_of(public, BlockBackend, public);
}
@@ -835,6 +869,8 @@ void blk_remove_bs(BlockBackend *blk)
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
BdrvChild *root;
+ GLOBAL_STATE_CODE();
+
notifier_list_notify(&blk->remove_bs_notifiers, blk);
if (tgm->throttle_state) {
BlockDriverState *bs = blk_bs(blk);
@@ -869,6 +905,7 @@ void blk_remove_bs(BlockBackend *blk)
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
+ GLOBAL_STATE_CODE();
bdrv_ref(bs);
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -892,6 +929,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
*/
int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp)
{
+ GLOBAL_STATE_CODE();
return bdrv_replace_child_bs(blk->root, new_bs, errp);
}
@@ -902,6 +940,7 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
Error **errp)
{
int ret;
+ GLOBAL_STATE_CODE();
if (blk->root && !blk->disable_perm) {
ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
@@ -918,6 +957,7 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
{
+ GLOBAL_STATE_CODE();
*perm = blk->perm;
*shared_perm = blk->shared_perm;
}
@@ -928,6 +968,7 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
*/
int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
{
+ GLOBAL_STATE_CODE();
if (blk->dev) {
return -EBUSY;
}
@@ -953,6 +994,7 @@ int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
{
assert(blk->dev == dev);
+ GLOBAL_STATE_CODE();
blk->dev = NULL;
blk->dev_ops = NULL;
blk->dev_opaque = NULL;
@@ -966,6 +1008,7 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
*/
DeviceState *blk_get_attached_dev(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->dev;
}
@@ -974,6 +1017,7 @@ DeviceState *blk_get_attached_dev(BlockBackend *blk)
char *blk_get_attached_dev_id(BlockBackend *blk)
{
DeviceState *dev = blk->dev;
+ IO_CODE();
if (!dev) {
return g_strdup("");
@@ -994,6 +1038,8 @@ BlockBackend *blk_by_dev(void *dev)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
+
assert(dev != NULL);
while ((blk = blk_all_next(blk)) != NULL) {
if (blk->dev == dev) {
@@ -1011,6 +1057,7 @@ BlockBackend *blk_by_dev(void *dev)
void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
void *opaque)
{
+ GLOBAL_STATE_CODE();
blk->dev_ops = ops;
blk->dev_opaque = opaque;
@@ -1032,6 +1079,7 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
*/
void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
{
+ GLOBAL_STATE_CODE();
if (blk->dev_ops && blk->dev_ops->change_media_cb) {
bool tray_was_open, tray_is_open;
Error *local_err = NULL;
@@ -1064,6 +1112,7 @@ static void blk_root_change_media(BdrvChild *child, bool load)
*/
bool blk_dev_has_removable_media(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
}
@@ -1072,6 +1121,7 @@ bool blk_dev_has_removable_media(BlockBackend *blk)
*/
bool blk_dev_has_tray(BlockBackend *blk)
{
+ IO_CODE();
return blk->dev_ops && blk->dev_ops->is_tray_open;
}
@@ -1081,6 +1131,7 @@ bool blk_dev_has_tray(BlockBackend *blk)
*/
void blk_dev_eject_request(BlockBackend *blk, bool force)
{
+ GLOBAL_STATE_CODE();
if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
}
@@ -1091,6 +1142,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force)
*/
bool blk_dev_is_tray_open(BlockBackend *blk)
{
+ IO_CODE();
if (blk_dev_has_tray(blk)) {
return blk->dev_ops->is_tray_open(blk->dev_opaque);
}
@@ -1103,6 +1155,7 @@ bool blk_dev_is_tray_open(BlockBackend *blk)
*/
bool blk_dev_is_medium_locked(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
return blk->dev_ops->is_medium_locked(blk->dev_opaque);
}
@@ -1123,6 +1176,7 @@ static void blk_root_resize(BdrvChild *child)
void blk_iostatus_enable(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
blk->iostatus_enabled = true;
blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
}
@@ -1131,6 +1185,7 @@ void blk_iostatus_enable(BlockBackend *blk)
* enables it _and_ the VM is configured to stop on errors */
bool blk_iostatus_is_enabled(const BlockBackend *blk)
{
+ IO_CODE();
return (blk->iostatus_enabled &&
(blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
blk->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
@@ -1139,16 +1194,19 @@ bool blk_iostatus_is_enabled(const BlockBackend *blk)
BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->iostatus;
}
void blk_iostatus_disable(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
blk->iostatus_enabled = false;
}
void blk_iostatus_reset(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
if (blk_iostatus_is_enabled(blk)) {
blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
}
@@ -1156,6 +1214,7 @@ void blk_iostatus_reset(BlockBackend *blk)
void blk_iostatus_set_err(BlockBackend *blk, int error)
{
+ IO_CODE();
assert(blk_iostatus_is_enabled(blk));
if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
@@ -1165,16 +1224,19 @@ void blk_iostatus_set_err(BlockBackend *blk, int error)
void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
{
+ IO_CODE();
blk->allow_write_beyond_eof = allow;
}
void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
{
+ IO_CODE();
blk->allow_aio_context_change = allow;
}
void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
{
+ IO_CODE();
blk->disable_request_queuing = disable;
}
@@ -1228,6 +1290,7 @@ blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
{
int ret;
BlockDriverState *bs;
+ IO_CODE();
blk_wait_while_drained(blk);
@@ -1258,6 +1321,7 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
BdrvRequestFlags flags)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
ret = blk_co_do_preadv(blk, offset, bytes, qiov, flags);
@@ -1274,6 +1338,7 @@ blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
{
int ret;
BlockDriverState *bs;
+ IO_CODE();
blk_wait_while_drained(blk);
@@ -1309,6 +1374,7 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
BdrvRequestFlags flags)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
@@ -1321,6 +1387,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
+ IO_OR_GS_CODE();
return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
}
@@ -1349,22 +1416,26 @@ typedef struct BlkRwCo {
int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes, BdrvRequestFlags flags)
{
+ IO_OR_GS_CODE();
return blk_pwritev_part(blk, offset, bytes, NULL, 0,
flags | BDRV_REQ_ZERO_WRITE);
}
int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
{
+ GLOBAL_STATE_CODE();
return bdrv_make_zero(blk->root, flags);
}
void blk_inc_in_flight(BlockBackend *blk)
{
+ IO_CODE();
qatomic_inc(&blk->in_flight);
}
void blk_dec_in_flight(BlockBackend *blk)
{
+ IO_CODE();
qatomic_dec(&blk->in_flight);
aio_wait_kick();
}
@@ -1383,6 +1454,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
void *opaque, int ret)
{
struct BlockBackendAIOCB *acb;
+ IO_CODE();
blk_inc_in_flight(blk);
acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
@@ -1490,6 +1562,7 @@ BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry,
flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
}
@@ -1498,6 +1571,7 @@ int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes)
{
int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
ret = blk_do_preadv(blk, offset, bytes, &qiov, 0);
@@ -1511,6 +1585,7 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
{
int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
ret = blk_pwritev_part(blk, offset, bytes, &qiov, 0, flags);
@@ -1519,6 +1594,7 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
int64_t blk_getlength(BlockBackend *blk)
{
+ IO_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -1528,6 +1604,7 @@ int64_t blk_getlength(BlockBackend *blk)
void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
{
+ IO_CODE();
if (!blk_bs(blk)) {
*nb_sectors_ptr = 0;
} else {
@@ -1537,6 +1614,7 @@ void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
int64_t blk_nb_sectors(BlockBackend *blk)
{
+ IO_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -1548,6 +1626,7 @@ BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
assert((uint64_t)qiov->size <= INT64_MAX);
return blk_aio_prwv(blk, offset, qiov->size, qiov,
blk_aio_read_entry, flags, cb, opaque);
@@ -1557,6 +1636,7 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
assert((uint64_t)qiov->size <= INT64_MAX);
return blk_aio_prwv(blk, offset, qiov->size, qiov,
blk_aio_write_entry, flags, cb, opaque);
@@ -1564,11 +1644,13 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
void blk_aio_cancel(BlockAIOCB *acb)
{
+ GLOBAL_STATE_CODE();
bdrv_aio_cancel(acb);
}
void blk_aio_cancel_async(BlockAIOCB *acb)
{
+ IO_CODE();
bdrv_aio_cancel_async(acb);
}
@@ -1576,6 +1658,8 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
int coroutine_fn
blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
+ IO_CODE();
+
blk_wait_while_drained(blk);
if (!blk_is_available(blk)) {
@@ -1588,6 +1672,7 @@ blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
ret = blk_do_ioctl(blk, req, buf);
@@ -1609,6 +1694,7 @@ static void blk_aio_ioctl_entry(void *opaque)
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
}
@@ -1617,6 +1703,7 @@ int coroutine_fn
blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
{
int ret;
+ IO_CODE();
blk_wait_while_drained(blk);
@@ -1641,6 +1728,7 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
int64_t offset, int64_t bytes,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
cb, opaque);
}
@@ -1649,6 +1737,7 @@ int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
int64_t bytes)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
ret = blk_co_do_pdiscard(blk, offset, bytes);
@@ -1660,6 +1749,7 @@ int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
ret = blk_do_pdiscard(blk, offset, bytes);
@@ -1672,6 +1762,7 @@ int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
int coroutine_fn blk_co_do_flush(BlockBackend *blk)
{
blk_wait_while_drained(blk);
+ IO_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
@@ -1692,12 +1783,14 @@ static void blk_aio_flush_entry(void *opaque)
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
}
int coroutine_fn blk_co_flush(BlockBackend *blk)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
ret = blk_co_do_flush(blk);
@@ -1720,6 +1813,7 @@ int blk_flush(BlockBackend *blk)
void blk_drain(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
bdrv_ref(bs);
@@ -1740,6 +1834,8 @@ void blk_drain_all(void)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
+
bdrv_drain_all_begin();
while ((blk = blk_all_next(blk)) != NULL) {
@@ -1759,12 +1855,14 @@ void blk_drain_all(void)
void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
BlockdevOnError on_write_error)
{
+ GLOBAL_STATE_CODE();
blk->on_read_error = on_read_error;
blk->on_write_error = on_write_error;
}
BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
{
+ IO_CODE();
return is_read ? blk->on_read_error : blk->on_write_error;
}
@@ -1772,6 +1870,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
int error)
{
BlockdevOnError on_err = blk_get_on_error(blk, is_read);
+ IO_CODE();
switch (on_err) {
case BLOCKDEV_ON_ERROR_ENOSPC:
@@ -1811,6 +1910,7 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action,
bool is_read, int error)
{
assert(error >= 0);
+ IO_CODE();
if (action == BLOCK_ERROR_ACTION_STOP) {
/* First set the iostatus, so that "info block" returns an iostatus
@@ -1842,6 +1942,7 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action,
bool blk_supports_write_perm(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
return !bdrv_is_read_only(bs);
@@ -1856,12 +1957,14 @@ bool blk_supports_write_perm(BlockBackend *blk)
*/
bool blk_is_writable(BlockBackend *blk)
{
+ IO_CODE();
return blk->perm & BLK_PERM_WRITE;
}
bool blk_is_sg(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (!bs) {
return false;
@@ -1872,41 +1975,47 @@ bool blk_is_sg(BlockBackend *blk)
bool blk_enable_write_cache(BlockBackend *blk)
{
+ IO_CODE();
return blk->enable_write_cache;
}
void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
{
+ GLOBAL_STATE_CODE();
blk->enable_write_cache = wce;
}
-void blk_invalidate_cache(BlockBackend *blk, Error **errp)
+void blk_activate(BlockBackend *blk, Error **errp)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (!bs) {
error_setg(errp, "Device '%s' has no medium", blk->name);
return;
}
- bdrv_invalidate_cache(bs, errp);
+ bdrv_activate(bs, errp);
}
bool blk_is_inserted(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
return bs && bdrv_is_inserted(bs);
}
bool blk_is_available(BlockBackend *blk)
{
+ IO_CODE();
return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
}
void blk_lock_medium(BlockBackend *blk, bool locked)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
if (bs) {
bdrv_lock_medium(bs, locked);
@@ -1917,6 +2026,7 @@ void blk_eject(BlockBackend *blk, bool eject_flag)
{
BlockDriverState *bs = blk_bs(blk);
char *id;
+ IO_CODE();
if (bs) {
bdrv_eject(bs, eject_flag);
@@ -1933,6 +2043,7 @@ void blk_eject(BlockBackend *blk, bool eject_flag)
int blk_get_flags(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
return bdrv_get_flags(bs);
@@ -1945,6 +2056,7 @@ int blk_get_flags(BlockBackend *blk)
uint32_t blk_get_request_alignment(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
}
@@ -1953,6 +2065,7 @@ uint64_t blk_get_max_hw_transfer(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
uint64_t max = INT_MAX;
+ IO_CODE();
if (bs) {
max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer);
@@ -1966,6 +2079,7 @@ uint32_t blk_get_max_transfer(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
uint32_t max = INT_MAX;
+ IO_CODE();
if (bs) {
max = MIN_NON_ZERO(max, bs->bl.max_transfer);
@@ -1975,33 +2089,39 @@ uint32_t blk_get_max_transfer(BlockBackend *blk)
int blk_get_max_hw_iov(BlockBackend *blk)
{
+ IO_CODE();
return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov,
blk->root->bs->bl.max_iov);
}
int blk_get_max_iov(BlockBackend *blk)
{
+ IO_CODE();
return blk->root->bs->bl.max_iov;
}
void blk_set_guest_block_size(BlockBackend *blk, int align)
{
+ IO_CODE();
blk->guest_block_size = align;
}
void *blk_try_blockalign(BlockBackend *blk, size_t size)
{
+ IO_CODE();
return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
}
void *blk_blockalign(BlockBackend *blk, size_t size)
{
+ IO_CODE();
return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
}
bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (!bs) {
return false;
@@ -2013,6 +2133,7 @@ bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
bdrv_op_unblock(bs, op, reason);
@@ -2022,6 +2143,7 @@ void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
void blk_op_block_all(BlockBackend *blk, Error *reason)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
bdrv_op_block_all(bs, reason);
@@ -2031,6 +2153,7 @@ void blk_op_block_all(BlockBackend *blk, Error *reason)
void blk_op_unblock_all(BlockBackend *blk, Error *reason)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
bdrv_op_unblock_all(bs, reason);
@@ -2040,6 +2163,7 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
AioContext *blk_get_aio_context(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
if (bs) {
AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
@@ -2090,6 +2214,7 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
Error **errp)
{
+ GLOBAL_STATE_CODE();
return blk_do_set_aio_context(blk, new_context, true, errp);
}
@@ -2126,6 +2251,7 @@ void blk_add_aio_context_notifier(BlockBackend *blk,
{
BlockBackendAioNotifier *notifier;
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
notifier = g_new(BlockBackendAioNotifier, 1);
notifier->attached_aio_context = attached_aio_context;
@@ -2148,6 +2274,8 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
BlockBackendAioNotifier *notifier;
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
+
if (bs) {
bdrv_remove_aio_context_notifier(bs, attached_aio_context,
detach_aio_context, opaque);
@@ -2168,17 +2296,20 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
{
+ GLOBAL_STATE_CODE();
notifier_list_add(&blk->remove_bs_notifiers, notify);
}
void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
{
+ GLOBAL_STATE_CODE();
notifier_list_add(&blk->insert_bs_notifiers, notify);
}
void blk_io_plug(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
if (bs) {
bdrv_io_plug(bs);
@@ -2188,6 +2319,7 @@ void blk_io_plug(BlockBackend *blk)
void blk_io_unplug(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
if (bs) {
bdrv_io_unplug(bs);
@@ -2196,18 +2328,21 @@ void blk_io_unplug(BlockBackend *blk)
BlockAcctStats *blk_get_stats(BlockBackend *blk)
{
+ IO_CODE();
return &blk->stats;
}
void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
}
int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes, BdrvRequestFlags flags)
{
+ IO_OR_GS_CODE();
return blk_co_pwritev(blk, offset, bytes, NULL,
flags | BDRV_REQ_ZERO_WRITE);
}
@@ -2216,6 +2351,7 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
int64_t bytes)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
return blk_pwritev_part(blk, offset, bytes, &qiov, 0,
BDRV_REQ_WRITE_COMPRESSED);
}
@@ -2223,6 +2359,7 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
{
+ IO_OR_GS_CODE();
if (!blk_is_available(blk)) {
error_setg(errp, "No medium inserted");
return -ENOMEDIUM;
@@ -2235,6 +2372,7 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int64_t pos, int size)
{
int ret;
+ GLOBAL_STATE_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
@@ -2254,6 +2392,7 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
{
+ GLOBAL_STATE_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -2263,6 +2402,7 @@ int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
{
+ GLOBAL_STATE_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -2272,6 +2412,7 @@ int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
{
+ GLOBAL_STATE_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -2285,6 +2426,7 @@ int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
*/
void blk_update_root_state(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
assert(blk->root);
blk->root_state.open_flags = blk->root->bs->open_flags;
@@ -2297,6 +2439,7 @@ void blk_update_root_state(BlockBackend *blk)
*/
bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->root_state.detect_zeroes;
}
@@ -2306,17 +2449,20 @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
*/
int blk_get_open_flags_from_root_state(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->root_state.open_flags;
}
BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return &blk->root_state;
}
int blk_commit_all(void)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
while ((blk = blk_all_next(blk)) != NULL) {
AioContext *aio_context = blk_get_aio_context(blk);
@@ -2341,6 +2487,7 @@ int blk_commit_all(void)
/* throttling disk I/O limits */
void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
{
+ GLOBAL_STATE_CODE();
throttle_group_config(&blk->public.throttle_group_member, cfg);
}
@@ -2349,6 +2496,7 @@ void blk_io_limits_disable(BlockBackend *blk)
BlockDriverState *bs = blk_bs(blk);
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
assert(tgm->throttle_state);
+ GLOBAL_STATE_CODE();
if (bs) {
bdrv_ref(bs);
bdrv_drained_begin(bs);
@@ -2364,12 +2512,14 @@ void blk_io_limits_disable(BlockBackend *blk)
void blk_io_limits_enable(BlockBackend *blk, const char *group)
{
assert(!blk->public.throttle_group_member.throttle_state);
+ GLOBAL_STATE_CODE();
throttle_group_register_tgm(&blk->public.throttle_group_member,
group, blk_get_aio_context(blk));
}
void blk_io_limits_update_group(BlockBackend *blk, const char *group)
{
+ GLOBAL_STATE_CODE();
/* this BB is not part of any group */
if (!blk->public.throttle_group_member.throttle_state) {
return;
@@ -2437,11 +2587,13 @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
void blk_register_buf(BlockBackend *blk, void *host, size_t size)
{
+ GLOBAL_STATE_CODE();
bdrv_register_buf(blk_bs(blk), host, size);
}
void blk_unregister_buf(BlockBackend *blk, void *host)
{
+ GLOBAL_STATE_CODE();
bdrv_unregister_buf(blk_bs(blk), host);
}
@@ -2451,6 +2603,8 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
BdrvRequestFlags write_flags)
{
int r;
+ IO_CODE();
+
r = blk_check_byte_request(blk_in, off_in, bytes);
if (r) {
return r;
@@ -2466,11 +2620,13 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
const BdrvChild *blk_root(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->root;
}
int blk_make_empty(BlockBackend *blk, Error **errp)
{
+ GLOBAL_STATE_CODE();
if (!blk_is_available(blk)) {
error_setg(errp, "No medium inserted");
return -ENOMEDIUM;
diff --git a/block/block-copy.c b/block/block-copy.c
index ce116318b5..ec46775ea5 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -17,11 +17,13 @@
#include "trace.h"
#include "qapi/error.h"
#include "block/block-copy.h"
+#include "block/reqlist.h"
#include "sysemu/block-backend.h"
#include "qemu/units.h"
#include "qemu/coroutine.h"
#include "block/aio_task.h"
#include "qemu/error-report.h"
+#include "qemu/memalign.h"
#define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
#define BLOCK_COPY_MAX_BUFFER (1 * MiB)
@@ -83,7 +85,6 @@ typedef struct BlockCopyTask {
*/
BlockCopyState *s;
BlockCopyCallState *call_state;
- int64_t offset;
/*
* @method can also be set again in the while loop of
* block_copy_dirty_clusters(), but it is never accessed concurrently
@@ -94,21 +95,17 @@ typedef struct BlockCopyTask {
BlockCopyMethod method;
/*
- * Fields whose state changes throughout the execution
- * Protected by lock in BlockCopyState.
- */
- CoQueue wait_queue; /* coroutines blocked on this task */
- /*
- * Only protect the case of parallel read while updating @bytes
- * value in block_copy_task_shrink().
+ * Generally, req is protected by lock in BlockCopyState, Still req.offset
+ * is only set on task creation, so may be read concurrently after creation.
+ * req.bytes is changed at most once, and need only protecting the case of
+ * parallel read while updating @bytes value in block_copy_task_shrink().
*/
- int64_t bytes;
- QLIST_ENTRY(BlockCopyTask) list;
+ BlockReq req;
} BlockCopyTask;
static int64_t task_end(BlockCopyTask *task)
{
- return task->offset + task->bytes;
+ return task->req.offset + task->req.bytes;
}
typedef struct BlockCopyState {
@@ -136,7 +133,7 @@ typedef struct BlockCopyState {
CoMutex lock;
int64_t in_flight_bytes;
BlockCopyMethod method;
- QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */
+ BlockReqList reqs;
QLIST_HEAD(, BlockCopyCallState) calls;
/*
* skip_unallocated:
@@ -161,42 +158,6 @@ typedef struct BlockCopyState {
} BlockCopyState;
/* Called with lock held */
-static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
- int64_t offset, int64_t bytes)
-{
- BlockCopyTask *t;
-
- QLIST_FOREACH(t, &s->tasks, list) {
- if (offset + bytes > t->offset && offset < t->offset + t->bytes) {
- return t;
- }
- }
-
- return NULL;
-}
-
-/*
- * If there are no intersecting tasks return false. Otherwise, wait for the
- * first found intersecting tasks to finish and return true.
- *
- * Called with lock held. May temporary release the lock.
- * Return value of 0 proves that lock was NOT released.
- */
-static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
- int64_t bytes)
-{
- BlockCopyTask *task = find_conflicting_task(s, offset, bytes);
-
- if (!task) {
- return false;
- }
-
- qemu_co_queue_wait(&task->wait_queue, &s->lock);
-
- return true;
-}
-
-/* Called with lock held */
static int64_t block_copy_chunk_size(BlockCopyState *s)
{
switch (s->method) {
@@ -239,7 +200,7 @@ block_copy_task_create(BlockCopyState *s, BlockCopyCallState *call_state,
bytes = QEMU_ALIGN_UP(bytes, s->cluster_size);
/* region is dirty, so no existent tasks possible in it */
- assert(!find_conflicting_task(s, offset, bytes));
+ assert(!reqlist_find_conflict(&s->reqs, offset, bytes));
bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
s->in_flight_bytes += bytes;
@@ -249,12 +210,9 @@ block_copy_task_create(BlockCopyState *s, BlockCopyCallState *call_state,
.task.func = block_copy_task_entry,
.s = s,
.call_state = call_state,
- .offset = offset,
- .bytes = bytes,
.method = s->method,
};
- qemu_co_queue_init(&task->wait_queue);
- QLIST_INSERT_HEAD(&s->tasks, task, list);
+ reqlist_init_req(&s->reqs, &task->req, offset, bytes);
return task;
}
@@ -270,34 +228,34 @@ static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task,
int64_t new_bytes)
{
QEMU_LOCK_GUARD(&task->s->lock);
- if (new_bytes == task->bytes) {
+ if (new_bytes == task->req.bytes) {
return;
}
- assert(new_bytes > 0 && new_bytes < task->bytes);
+ assert(new_bytes > 0 && new_bytes < task->req.bytes);
- task->s->in_flight_bytes -= task->bytes - new_bytes;
+ task->s->in_flight_bytes -= task->req.bytes - new_bytes;
bdrv_set_dirty_bitmap(task->s->copy_bitmap,
- task->offset + new_bytes, task->bytes - new_bytes);
+ task->req.offset + new_bytes,
+ task->req.bytes - new_bytes);
- task->bytes = new_bytes;
- qemu_co_queue_restart_all(&task->wait_queue);
+ reqlist_shrink_req(&task->req, new_bytes);
}
static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret)
{
QEMU_LOCK_GUARD(&task->s->lock);
- task->s->in_flight_bytes -= task->bytes;
+ task->s->in_flight_bytes -= task->req.bytes;
if (ret < 0) {
- bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->offset, task->bytes);
+ bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->req.offset,
+ task->req.bytes);
}
- QLIST_REMOVE(task, list);
if (task->s->progress) {
progress_set_remaining(task->s->progress,
bdrv_get_dirty_count(task->s->copy_bitmap) +
task->s->in_flight_bytes);
}
- qemu_co_queue_restart_all(&task->wait_queue);
+ reqlist_remove_req(&task->req);
}
void block_copy_state_free(BlockCopyState *s)
@@ -384,8 +342,10 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
}
BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ const BdrvDirtyBitmap *bitmap,
Error **errp)
{
+ ERRP_GUARD();
BlockCopyState *s;
int64_t cluster_size;
BdrvDirtyBitmap *copy_bitmap;
@@ -402,6 +362,17 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
return NULL;
}
bdrv_disable_dirty_bitmap(copy_bitmap);
+ if (bitmap) {
+ if (!bdrv_merge_dirty_bitmap(copy_bitmap, bitmap, NULL, errp)) {
+ error_prepend(errp, "Failed to merge bitmap '%s' to internal "
+ "copy-bitmap: ", bdrv_dirty_bitmap_name(bitmap));
+ bdrv_release_dirty_bitmap(copy_bitmap);
+ return NULL;
+ }
+ } else {
+ bdrv_set_dirty_bitmap(copy_bitmap, 0,
+ bdrv_dirty_bitmap_size(copy_bitmap));
+ }
/*
* If source is in backing chain of target assume that target is going to be
@@ -437,7 +408,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
ratelimit_init(&s->rate_limit);
qemu_co_mutex_init(&s->lock);
- QLIST_INIT(&s->tasks);
+ QLIST_INIT(&s->reqs);
QLIST_INIT(&s->calls);
return s;
@@ -470,7 +441,7 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
aio_task_pool_wait_slot(pool);
if (aio_task_pool_status(pool) < 0) {
- co_put_to_shres(task->s->mem, task->bytes);
+ co_put_to_shres(task->s->mem, task->req.bytes);
block_copy_task_end(task, -ECANCELED);
g_free(task);
return -ECANCELED;
@@ -583,7 +554,8 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
BlockCopyMethod method = t->method;
int ret;
- ret = block_copy_do_copy(s, t->offset, t->bytes, &method, &error_is_read);
+ ret = block_copy_do_copy(s, t->req.offset, t->req.bytes, &method,
+ &error_is_read);
WITH_QEMU_LOCK_GUARD(&s->lock) {
if (s->method == t->method) {
@@ -596,10 +568,10 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
t->call_state->error_is_read = error_is_read;
}
} else if (s->progress) {
- progress_work_done(s->progress, t->bytes);
+ progress_work_done(s->progress, t->req.bytes);
}
}
- co_put_to_shres(s->mem, t->bytes);
+ co_put_to_shres(s->mem, t->req.bytes);
block_copy_task_end(t, ret);
return ret;
@@ -679,6 +651,18 @@ static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
}
}
+void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes)
+{
+ QEMU_LOCK_GUARD(&s->lock);
+
+ bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
+ if (s->progress) {
+ progress_set_remaining(s->progress,
+ bdrv_get_dirty_count(s->copy_bitmap) +
+ s->in_flight_bytes);
+ }
+}
+
/*
* Reset bits in copy_bitmap starting at offset if they represent unallocated
* data in the image. May reset subsequent contiguous bits.
@@ -699,14 +683,7 @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
bytes = clusters * s->cluster_size;
if (!ret) {
- qemu_co_mutex_lock(&s->lock);
- bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
- if (s->progress) {
- progress_set_remaining(s->progress,
- bdrv_get_dirty_count(s->copy_bitmap) +
- s->in_flight_bytes);
- }
- qemu_co_mutex_unlock(&s->lock);
+ block_copy_reset(s, offset, bytes);
}
*count = bytes;
@@ -753,22 +730,22 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
trace_block_copy_skip_range(s, offset, bytes);
break;
}
- if (task->offset > offset) {
- trace_block_copy_skip_range(s, offset, task->offset - offset);
+ if (task->req.offset > offset) {
+ trace_block_copy_skip_range(s, offset, task->req.offset - offset);
}
found_dirty = true;
- ret = block_copy_block_status(s, task->offset, task->bytes,
+ ret = block_copy_block_status(s, task->req.offset, task->req.bytes,
&status_bytes);
assert(ret >= 0); /* never fail */
- if (status_bytes < task->bytes) {
+ if (status_bytes < task->req.bytes) {
block_copy_task_shrink(task, status_bytes);
}
if (qatomic_read(&s->skip_unallocated) &&
!(ret & BDRV_BLOCK_ALLOCATED)) {
block_copy_task_end(task, 0);
- trace_block_copy_skip_range(s, task->offset, task->bytes);
+ trace_block_copy_skip_range(s, task->req.offset, task->req.bytes);
offset = task_end(task);
bytes = end - offset;
g_free(task);
@@ -789,11 +766,11 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
}
}
- ratelimit_calculate_delay(&s->rate_limit, task->bytes);
+ ratelimit_calculate_delay(&s->rate_limit, task->req.bytes);
- trace_block_copy_process(s, task->offset);
+ trace_block_copy_process(s, task->req.offset);
- co_get_from_shres(s->mem, task->bytes);
+ co_get_from_shres(s->mem, task->req.bytes);
offset = task_end(task);
bytes = end - offset;
@@ -861,8 +838,8 @@ static int coroutine_fn block_copy_common(BlockCopyCallState *call_state)
* Check that there is no task we still need to
* wait to complete
*/
- ret = block_copy_wait_one(s, call_state->offset,
- call_state->bytes);
+ ret = reqlist_wait_one(&s->reqs, call_state->offset,
+ call_state->bytes, &s->lock);
if (ret == 0) {
/*
* No pending tasks, but check again the bitmap in this
@@ -870,7 +847,7 @@ static int coroutine_fn block_copy_common(BlockCopyCallState *call_state)
* between this and the critical section in
* block_copy_dirty_clusters().
*
- * block_copy_wait_one return value 0 also means that it
+ * reqlist_wait_one return value 0 also means that it
* didn't release the lock. So, we are still in the same
* critical section, not interrupted by any concurrent
* access to state.
diff --git a/block/commit.c b/block/commit.c
index b1fc7b908b..851d1c557a 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -20,6 +20,7 @@
#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
+#include "qemu/memalign.h"
#include "sysemu/block-backend.h"
enum {
@@ -253,6 +254,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
uint64_t base_perms, iter_shared_perms;
int ret;
+ GLOBAL_STATE_CODE();
+
assert(top != bs);
if (bdrv_skip_filters(top) == bdrv_skip_filters(base)) {
error_setg(errp, "Invalid files for merge: top and base are the same");
@@ -432,6 +435,8 @@ int bdrv_commit(BlockDriverState *bs)
QEMU_AUTO_VFREE uint8_t *buf = NULL;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
+
if (!drv)
return -ENOMEDIUM;
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index c30a5ff8de..a8a06fdc09 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -33,10 +33,37 @@
#include "block/block-copy.h"
#include "block/copy-before-write.h"
+#include "block/reqlist.h"
+
+#include "qapi/qapi-visit-block-core.h"
typedef struct BDRVCopyBeforeWriteState {
BlockCopyState *bcs;
BdrvChild *target;
+
+ /*
+ * @lock: protects access to @access_bitmap, @done_bitmap and
+ * @frozen_read_reqs
+ */
+ CoMutex lock;
+
+ /*
+ * @access_bitmap: represents areas allowed for reading by fleecing user.
+ * Reading from non-dirty areas leads to -EACCES.
+ */
+ BdrvDirtyBitmap *access_bitmap;
+
+ /*
+ * @done_bitmap: represents areas that was successfully copied to @target by
+ * copy-before-write operations.
+ */
+ BdrvDirtyBitmap *done_bitmap;
+
+ /*
+ * @frozen_read_reqs: current read requests for fleecing user in bs->file
+ * node. These areas must not be rewritten by guest.
+ */
+ BlockReqList frozen_read_reqs;
} BDRVCopyBeforeWriteState;
static coroutine_fn int cbw_co_preadv(
@@ -46,10 +73,20 @@ static coroutine_fn int cbw_co_preadv(
return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
}
+/*
+ * Do copy-before-write operation.
+ *
+ * On failure guest request must be failed too.
+ *
+ * On success, we also wait for all in-flight fleecing read requests in source
+ * node, and it's guaranteed that after cbw_do_copy_before_write() successful
+ * return there are no such requests and they will never appear.
+ */
static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
uint64_t offset, uint64_t bytes, BdrvRequestFlags flags)
{
BDRVCopyBeforeWriteState *s = bs->opaque;
+ int ret;
uint64_t off, end;
int64_t cluster_size = block_copy_cluster_size(s->bcs);
@@ -60,7 +97,17 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
off = QEMU_ALIGN_DOWN(offset, cluster_size);
end = QEMU_ALIGN_UP(offset + bytes, cluster_size);
- return block_copy(s->bcs, off, end - off, true);
+ ret = block_copy(s->bcs, off, end - off, true);
+ if (ret < 0) {
+ return ret;
+ }
+
+ WITH_QEMU_LOCK_GUARD(&s->lock) {
+ bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
+ reqlist_wait_all(&s->frozen_read_reqs, off, end - off, &s->lock);
+ }
+
+ return 0;
}
static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs,
@@ -108,6 +155,142 @@ static int coroutine_fn cbw_co_flush(BlockDriverState *bs)
return bdrv_co_flush(bs->file->bs);
}
+/*
+ * If @offset not accessible - return NULL.
+ *
+ * Otherwise, set @pnum to some bytes that accessible from @file (@file is set
+ * to bs->file or to s->target). Return newly allocated BlockReq object that
+ * should be than passed to cbw_snapshot_read_unlock().
+ *
+ * It's guaranteed that guest writes will not interact in the region until
+ * cbw_snapshot_read_unlock() called.
+ */
+static BlockReq *cbw_snapshot_read_lock(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum, BdrvChild **file)
+{
+ BDRVCopyBeforeWriteState *s = bs->opaque;
+ BlockReq *req = g_new(BlockReq, 1);
+ bool done;
+
+ QEMU_LOCK_GUARD(&s->lock);
+
+ if (bdrv_dirty_bitmap_next_zero(s->access_bitmap, offset, bytes) != -1) {
+ g_free(req);
+ return NULL;
+ }
+
+ done = bdrv_dirty_bitmap_status(s->done_bitmap, offset, bytes, pnum);
+ if (done) {
+ /*
+ * Special invalid BlockReq, that is handled in
+ * cbw_snapshot_read_unlock(). We don't need to lock something to read
+ * from s->target.
+ */
+ *req = (BlockReq) {.offset = -1, .bytes = -1};
+ *file = s->target;
+ } else {
+ reqlist_init_req(&s->frozen_read_reqs, req, offset, bytes);
+ *file = bs->file;
+ }
+
+ return req;
+}
+
+static void cbw_snapshot_read_unlock(BlockDriverState *bs, BlockReq *req)
+{
+ BDRVCopyBeforeWriteState *s = bs->opaque;
+
+ if (req->offset == -1 && req->bytes == -1) {
+ g_free(req);
+ return;
+ }
+
+ QEMU_LOCK_GUARD(&s->lock);
+
+ reqlist_remove_req(req);
+ g_free(req);
+}
+
+static coroutine_fn int
+cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset)
+{
+ BlockReq *req;
+ BdrvChild *file;
+ int ret;
+
+ /* TODO: upgrade to async loop using AioTask */
+ while (bytes) {
+ int64_t cur_bytes;
+
+ req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &file);
+ if (!req) {
+ return -EACCES;
+ }
+
+ ret = bdrv_co_preadv_part(file, offset, cur_bytes,
+ qiov, qiov_offset, 0);
+ cbw_snapshot_read_unlock(bs, req);
+ if (ret < 0) {
+ return ret;
+ }
+
+ bytes -= cur_bytes;
+ offset += cur_bytes;
+ qiov_offset += cur_bytes;
+ }
+
+ return 0;
+}
+
+static int coroutine_fn
+cbw_co_snapshot_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
+ BlockDriverState **file)
+{
+ BDRVCopyBeforeWriteState *s = bs->opaque;
+ BlockReq *req;
+ int ret;
+ int64_t cur_bytes;
+ BdrvChild *child;
+
+ req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &child);
+ if (!req) {
+ return -EACCES;
+ }
+
+ ret = bdrv_block_status(child->bs, offset, cur_bytes, pnum, map, file);
+ if (child == s->target) {
+ /*
+ * We refer to s->target only for areas that we've written to it.
+ * And we can not report unallocated blocks in s->target: this will
+ * break generic block-status-above logic, that will go to
+ * copy-before-write filtered child in this case.
+ */
+ assert(ret & BDRV_BLOCK_ALLOCATED);
+ }
+
+ cbw_snapshot_read_unlock(bs, req);
+
+ return ret;
+}
+
+static int coroutine_fn cbw_co_pdiscard_snapshot(BlockDriverState *bs,
+ int64_t offset, int64_t bytes)
+{
+ BDRVCopyBeforeWriteState *s = bs->opaque;
+
+ WITH_QEMU_LOCK_GUARD(&s->lock) {
+ bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes);
+ }
+
+ block_copy_reset(s->bcs, offset, bytes);
+
+ return bdrv_co_pdiscard(s->target, offset, bytes);
+}
+
static void cbw_refresh_filename(BlockDriverState *bs)
{
pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
@@ -145,11 +328,54 @@ static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
}
}
+static bool cbw_parse_bitmap_option(QDict *options, BdrvDirtyBitmap **bitmap,
+ Error **errp)
+{
+ QDict *bitmap_qdict = NULL;
+ BlockDirtyBitmap *bmp_param = NULL;
+ Visitor *v = NULL;
+ bool ret = false;
+
+ *bitmap = NULL;
+
+ qdict_extract_subqdict(options, &bitmap_qdict, "bitmap.");
+ if (!qdict_size(bitmap_qdict)) {
+ ret = true;
+ goto out;
+ }
+
+ v = qobject_input_visitor_new_flat_confused(bitmap_qdict, errp);
+ if (!v) {
+ goto out;
+ }
+
+ visit_type_BlockDirtyBitmap(v, NULL, &bmp_param, errp);
+ if (!bmp_param) {
+ goto out;
+ }
+
+ *bitmap = block_dirty_bitmap_lookup(bmp_param->node, bmp_param->name, NULL,
+ errp);
+ if (!*bitmap) {
+ goto out;
+ }
+
+ ret = true;
+
+out:
+ qapi_free_BlockDirtyBitmap(bmp_param);
+ visit_free(v);
+ qobject_unref(bitmap_qdict);
+
+ return ret;
+}
+
static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVCopyBeforeWriteState *s = bs->opaque;
- BdrvDirtyBitmap *copy_bitmap;
+ BdrvDirtyBitmap *bitmap = NULL;
+ int64_t cluster_size;
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -164,6 +390,10 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
return -EINVAL;
}
+ if (!cbw_parse_bitmap_option(options, &bitmap, errp)) {
+ return -EINVAL;
+ }
+
bs->total_sectors = bs->file->bs->total_sectors;
bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
(BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
@@ -171,14 +401,32 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
bs->file->bs->supported_zero_flags);
- s->bcs = block_copy_state_new(bs->file, s->target, errp);
+ s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
if (!s->bcs) {
error_prepend(errp, "Cannot create block-copy-state: ");
return -EINVAL;
}
- copy_bitmap = block_copy_dirty_bitmap(s->bcs);
- bdrv_set_dirty_bitmap(copy_bitmap, 0, bdrv_dirty_bitmap_size(copy_bitmap));
+ cluster_size = block_copy_cluster_size(s->bcs);
+
+ s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
+ if (!s->done_bitmap) {
+ return -EINVAL;
+ }
+ bdrv_disable_dirty_bitmap(s->done_bitmap);
+
+ /* s->access_bitmap starts equal to bcs bitmap */
+ s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
+ if (!s->access_bitmap) {
+ return -EINVAL;
+ }
+ bdrv_disable_dirty_bitmap(s->access_bitmap);
+ bdrv_dirty_bitmap_merge_internal(s->access_bitmap,
+ block_copy_dirty_bitmap(s->bcs), NULL,
+ true);
+
+ qemu_co_mutex_init(&s->lock);
+ QLIST_INIT(&s->frozen_read_reqs);
return 0;
}
@@ -187,6 +435,9 @@ static void cbw_close(BlockDriverState *bs)
{
BDRVCopyBeforeWriteState *s = bs->opaque;
+ bdrv_release_dirty_bitmap(s->access_bitmap);
+ bdrv_release_dirty_bitmap(s->done_bitmap);
+
block_copy_state_free(s->bcs);
s->bcs = NULL;
}
@@ -204,6 +455,10 @@ BlockDriver bdrv_cbw_filter = {
.bdrv_co_pdiscard = cbw_co_pdiscard,
.bdrv_co_flush = cbw_co_flush,
+ .bdrv_co_preadv_snapshot = cbw_co_preadv_snapshot,
+ .bdrv_co_pdiscard_snapshot = cbw_co_pdiscard_snapshot,
+ .bdrv_co_snapshot_block_status = cbw_co_snapshot_block_status,
+
.bdrv_refresh_filename = cbw_refresh_filename,
.bdrv_child_perm = cbw_child_perm,
@@ -223,6 +478,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
QDict *opts;
assert(source->total_sectors == target->total_sectors);
+ GLOBAL_STATE_CODE();
opts = qdict_new();
qdict_put_str(opts, "driver", "copy-before-write");
@@ -245,6 +501,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
void bdrv_cbw_drop(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
bdrv_drop_filter(bs, &error_abort);
bdrv_unref(bs);
}
diff --git a/block/copy-before-write.h b/block/copy-before-write.h
index 51847e711a..6e72bb25e9 100644
--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
@@ -29,6 +29,13 @@
#include "block/block_int.h"
#include "block/block-copy.h"
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
BlockDriverState *target,
const char *filter_node_name,
diff --git a/block/coroutines.h b/block/coroutines.h
index c8c14a29c8..b293e943c8 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -30,17 +30,17 @@
/* For blk_bs() in generated block/block-gen.c */
#include "sysemu/block-backend.h"
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
int coroutine_fn bdrv_co_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix);
int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
-int generated_co_wrapper
-bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
- QEMUIOVector *qiov, BdrvRequestFlags flags);
-int generated_co_wrapper
-bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
- QEMUIOVector *qiov, BdrvRequestFlags flags);
-
int coroutine_fn
bdrv_co_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
@@ -52,6 +52,51 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
int64_t *map,
BlockDriverState **file,
int *depth);
+
+int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
+ QEMUIOVector *qiov, int64_t pos);
+int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
+ QEMUIOVector *qiov, int64_t pos);
+
+int coroutine_fn
+nbd_co_do_establish_connection(BlockDriverState *bs, Error **errp);
+
+
+int coroutine_fn
+blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags);
+
+
+int coroutine_fn
+blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags);
+
+int coroutine_fn
+blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
+
+int coroutine_fn
+blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
+
+int coroutine_fn blk_co_do_flush(BlockBackend *blk);
+
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * See include/block/block-io.h for more information about
+ * the "I/O or GS" API.
+ */
+
+int generated_co_wrapper
+bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags);
+
+int generated_co_wrapper
+bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags);
+
int generated_co_wrapper
bdrv_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
@@ -63,46 +108,24 @@ bdrv_common_block_status_above(BlockDriverState *bs,
int64_t *map,
BlockDriverState **file,
int *depth);
-
-int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
- QEMUIOVector *qiov, int64_t pos);
-int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
- QEMUIOVector *qiov, int64_t pos);
-
int generated_co_wrapper
nbd_do_establish_connection(BlockDriverState *bs, Error **errp);
-int coroutine_fn
-nbd_co_do_establish_connection(BlockDriverState *bs, Error **errp);
-
int generated_co_wrapper
blk_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags);
-int coroutine_fn
-blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, BdrvRequestFlags flags);
-
int generated_co_wrapper
blk_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags);
-int coroutine_fn
-blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset,
- BdrvRequestFlags flags);
int generated_co_wrapper
blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
-int coroutine_fn
-blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
int generated_co_wrapper
blk_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
-int coroutine_fn
-blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
int generated_co_wrapper blk_do_flush(BlockBackend *blk);
-int coroutine_fn blk_co_do_flush(BlockBackend *blk);
#endif /* BLOCK_COROUTINES_INT_H */
diff --git a/block/create.c b/block/create.c
index 89812669df..4df43f11f4 100644
--- a/block/create.c
+++ b/block/create.c
@@ -42,6 +42,8 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp)
BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common);
int ret;
+ GLOBAL_STATE_CODE();
+
job_progress_set_remaining(&s->common, 1);
ret = s->drv->bdrv_co_create(s->opts, errp);
job_progress_update(&s->common, 1);
diff --git a/block/crypto.c b/block/crypto.c
index c8ba4681e2..1ba82984ef 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -31,6 +31,7 @@
#include "qemu/module.h"
#include "qemu/option.h"
#include "qemu/cutils.h"
+#include "qemu/memalign.h"
#include "crypto.h"
typedef struct BlockCrypto BlockCrypto;
@@ -778,36 +779,54 @@ block_crypto_get_specific_info_luks(BlockDriverState *bs, Error **errp)
}
static int
+block_crypto_amend_prepare(BlockDriverState *bs, Error **errp)
+{
+ BlockCrypto *crypto = bs->opaque;
+ int ret;
+
+ /* apply for exclusive read/write permissions to the underlying file */
+ crypto->updating_keys = true;
+ ret = bdrv_child_refresh_perms(bs, bs->file, errp);
+ if (ret < 0) {
+ /* Well, in this case we will not be updating any keys */
+ crypto->updating_keys = false;
+ }
+ return ret;
+}
+
+static void
+block_crypto_amend_cleanup(BlockDriverState *bs)
+{
+ BlockCrypto *crypto = bs->opaque;
+ Error *errp = NULL;
+
+ /* release exclusive read/write permissions to the underlying file */
+ crypto->updating_keys = false;
+ bdrv_child_refresh_perms(bs, bs->file, &errp);
+
+ if (errp) {
+ error_report_err(errp);
+ }
+}
+
+static int
block_crypto_amend_options_generic_luks(BlockDriverState *bs,
QCryptoBlockAmendOptions *amend_options,
bool force,
Error **errp)
{
BlockCrypto *crypto = bs->opaque;
- int ret;
assert(crypto);
assert(crypto->block);
- /* apply for exclusive read/write permissions to the underlying file*/
- crypto->updating_keys = true;
- ret = bdrv_child_refresh_perms(bs, bs->file, errp);
- if (ret) {
- goto cleanup;
- }
-
- ret = qcrypto_block_amend_options(crypto->block,
- block_crypto_read_func,
- block_crypto_write_func,
- bs,
- amend_options,
- force,
- errp);
-cleanup:
- /* release exclusive read/write permissions to the underlying file*/
- crypto->updating_keys = false;
- bdrv_child_refresh_perms(bs, bs->file, errp);
- return ret;
+ return qcrypto_block_amend_options(crypto->block,
+ block_crypto_read_func,
+ block_crypto_write_func,
+ bs,
+ amend_options,
+ force,
+ errp);
}
static int
@@ -833,8 +852,16 @@ block_crypto_amend_options_luks(BlockDriverState *bs,
if (!amend_options) {
goto cleanup;
}
+
+ ret = block_crypto_amend_prepare(bs, errp);
+ if (ret) {
+ goto perm_cleanup;
+ }
ret = block_crypto_amend_options_generic_luks(bs, amend_options,
force, errp);
+
+perm_cleanup:
+ block_crypto_amend_cleanup(bs);
cleanup:
qapi_free_QCryptoBlockAmendOptions(amend_options);
return ret;
@@ -931,6 +958,8 @@ static BlockDriver bdrv_crypto_luks = {
.bdrv_get_specific_info = block_crypto_get_specific_info_luks,
.bdrv_amend_options = block_crypto_amend_options_luks,
.bdrv_co_amend = block_crypto_co_amend_luks,
+ .bdrv_amend_pre_run = block_crypto_amend_prepare,
+ .bdrv_amend_clean = block_crypto_amend_cleanup,
.is_format = true,
diff --git a/block/curl.c b/block/curl.c
index 6a6cd72975..1e0f609579 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -458,38 +458,51 @@ static int curl_init_state(BDRVCURLState *s, CURLState *state)
if (!state->curl) {
return -EIO;
}
- curl_easy_setopt(state->curl, CURLOPT_URL, s->url);
- curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
- (long) s->sslverify);
- curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYHOST,
- s->sslverify ? 2L : 0L);
+ if (curl_easy_setopt(state->curl, CURLOPT_URL, s->url) ||
+ curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
+ (long) s->sslverify) ||
+ curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYHOST,
+ s->sslverify ? 2L : 0L)) {
+ goto err;
+ }
if (s->cookie) {
- curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie);
+ if (curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie)) {
+ goto err;
+ }
+ }
+ if (curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, (long)s->timeout) ||
+ curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION,
+ (void *)curl_read_cb) ||
+ curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state) ||
+ curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state) ||
+ curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1) ||
+ curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1) ||
+ curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1) ||
+ curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg) ||
+ curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1)) {
+ goto err;
}
- curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, (long)s->timeout);
- curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION,
- (void *)curl_read_cb);
- curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state);
- curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state);
- curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1);
- curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1);
- curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1);
- curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg);
- curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1);
-
if (s->username) {
- curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username);
+ if (curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username)) {
+ goto err;
+ }
}
if (s->password) {
- curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password);
+ if (curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password)) {
+ goto err;
+ }
}
if (s->proxyusername) {
- curl_easy_setopt(state->curl,
- CURLOPT_PROXYUSERNAME, s->proxyusername);
+ if (curl_easy_setopt(state->curl,
+ CURLOPT_PROXYUSERNAME, s->proxyusername)) {
+ goto err;
+ }
}
if (s->proxypassword) {
- curl_easy_setopt(state->curl,
- CURLOPT_PROXYPASSWORD, s->proxypassword);
+ if (curl_easy_setopt(state->curl,
+ CURLOPT_PROXYPASSWORD, s->proxypassword)) {
+ goto err;
+ }
}
/* Restrict supported protocols to avoid security issues in the more
@@ -499,18 +512,27 @@ static int curl_init_state(BDRVCURLState *s, CURLState *state)
* Restricting protocols is only supported from 7.19.4 upwards.
*/
#if LIBCURL_VERSION_NUM >= 0x071304
- curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS);
- curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS);
+ if (curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS) ||
+ curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS)) {
+ goto err;
+ }
#endif
#ifdef DEBUG_VERBOSE
- curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1);
+ if (curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1)) {
+ goto err;
+ }
#endif
}
state->s = s;
return 0;
+
+err:
+ curl_easy_cleanup(state->curl);
+ state->curl = NULL;
+ return -EIO;
}
/* Called with s->mutex held. */
@@ -759,14 +781,19 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
// Get file size
if (curl_init_state(s, state) < 0) {
+ pstrcpy(state->errmsg, CURL_ERROR_SIZE,
+ "curl library initialization failed.");
goto out;
}
s->accept_range = false;
- curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1);
- curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION,
- curl_header_cb);
- curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s);
+ if (curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1) ||
+ curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION, curl_header_cb) ||
+ curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s)) {
+ pstrcpy(state->errmsg, CURL_ERROR_SIZE,
+ "curl library initialization failed.");
+ goto out;
+ }
if (curl_easy_perform(state->curl))
goto out;
if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d)) {
@@ -879,9 +906,8 @@ static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
snprintf(state->range, 127, "%" PRIu64 "-%" PRIu64, start, end);
trace_curl_setup_preadv(acb->bytes, start, state->range);
- curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range);
-
- if (curl_multi_add_handle(s->multi, state->curl) != CURLM_OK) {
+ if (curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range) ||
+ curl_multi_add_handle(s->multi, state->curl) != CURLM_OK) {
state->acb[0] = NULL;
acb->ret = -EIO;
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 0ef46163e3..da1b91166f 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -496,6 +496,7 @@ static void coroutine_fn bdrv_co_can_store_new_dirty_bitmap_entry(void *opaque)
bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
uint32_t granularity, Error **errp)
{
+ IO_CODE();
if (qemu_in_coroutine()) {
return bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp);
} else {
@@ -656,6 +657,7 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
{
+ IO_CODE();
assert(!bdrv_dirty_bitmap_readonly(bitmap));
bdrv_dirty_bitmaps_lock(bitmap->bs);
if (!out) {
@@ -673,6 +675,7 @@ void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup)
{
HBitmap *tmp = bitmap->bitmap;
assert(!bdrv_dirty_bitmap_readonly(bitmap));
+ GLOBAL_STATE_CODE();
bitmap->bitmap = backup;
hbitmap_free(tmp);
}
@@ -737,6 +740,7 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes)
{
BdrvDirtyBitmap *bitmap;
+ IO_CODE();
if (QLIST_EMPTY(&bs->dirty_bitmaps)) {
return;
@@ -875,16 +879,25 @@ bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap *bitmap,
dirty_start, dirty_count);
}
+bool bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap, int64_t offset,
+ int64_t bytes, int64_t *count)
+{
+ return hbitmap_status(bitmap->bitmap, offset, bytes, count);
+}
+
/**
* bdrv_merge_dirty_bitmap: merge src into dest.
* Ensures permissions on bitmaps are reasonable; use for public API.
*
* @backup: If provided, make a copy of dest here prior to merge.
+ *
+ * Returns true on success, false on failure. In case of failure bitmaps are
+ * untouched.
*/
-void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
+bool bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
HBitmap **backup, Error **errp)
{
- bool ret;
+ bool ret = false;
bdrv_dirty_bitmaps_lock(dest->bs);
if (src->bs != dest->bs) {
@@ -912,6 +925,8 @@ out:
if (src->bs != dest->bs) {
bdrv_dirty_bitmaps_unlock(src->bs);
}
+
+ return ret;
}
/**
@@ -928,6 +943,7 @@ bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
bool lock)
{
bool ret;
+ IO_CODE();
assert(!bdrv_dirty_bitmap_readonly(dest));
assert(!bdrv_dirty_bitmap_inconsistent(dest));
diff --git a/block/dmg.c b/block/dmg.c
index 447901fbb8..c626587f9c 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -27,6 +27,7 @@
#include "qemu/bswap.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
+#include "qemu/memalign.h"
#include "dmg.h"
int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
diff --git a/block/export/export.c b/block/export/export.c
index 6d3b9964c8..7253af3bc3 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -139,7 +139,7 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
* access since the export could be available before migration handover.
* ctx was acquired in the caller.
*/
- bdrv_invalidate_cache(bs, NULL);
+ bdrv_activate(bs, NULL);
perm = BLK_PERM_CONSISTENT_READ;
if (export->writable) {
diff --git a/block/export/fuse.c b/block/export/fuse.c
index fdda8e3c81..e80b24a867 100644
--- a/block/export/fuse.c
+++ b/block/export/fuse.c
@@ -19,6 +19,7 @@
#define FUSE_USE_VERSION 31
#include "qemu/osdep.h"
+#include "qemu/memalign.h"
#include "block/aio.h"
#include "block/block.h"
#include "block/export.h"
@@ -86,8 +87,8 @@ static int fuse_export_create(BlockExport *blk_exp,
assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
- /* For growable exports, take the RESIZE permission */
- if (args->growable) {
+ /* For growable and writable exports, take the RESIZE permission */
+ if (args->growable || blk_exp_args->writable) {
uint64_t blk_perm, blk_shared_perm;
blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
@@ -392,14 +393,23 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size,
{
uint64_t blk_perm, blk_shared_perm;
BdrvRequestFlags truncate_flags = 0;
- int ret;
+ bool add_resize_perm;
+ int ret, ret_check;
+
+ /* Growable and writable exports have a permanent RESIZE permission */
+ add_resize_perm = !exp->growable && !exp->writable;
if (req_zero_write) {
truncate_flags |= BDRV_REQ_ZERO_WRITE;
}
- /* Growable exports have a permanent RESIZE permission */
- if (!exp->growable) {
+ if (add_resize_perm) {
+
+ if (!qemu_in_main_thread()) {
+ /* Changing permissions like below only works in the main thread */
+ return -EPERM;
+ }
+
blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
@@ -412,9 +422,11 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size,
ret = blk_truncate(exp->common.blk, size, true, prealloc,
truncate_flags, NULL);
- if (!exp->growable) {
+ if (add_resize_perm) {
/* Must succeed, because we are only giving up the RESIZE permission */
- blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort);
+ ret_check = blk_set_perm(exp->common.blk, blk_perm,
+ blk_shared_perm, &error_abort);
+ assert(ret_check == 0);
}
return ret;
diff --git a/block/file-posix.c b/block/file-posix.c
index 1f1756e192..c000a61db2 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -31,6 +31,7 @@
#include "qemu/module.h"
#include "qemu/option.h"
#include "qemu/units.h"
+#include "qemu/memalign.h"
#include "trace.h"
#include "block/thread-pool.h"
#include "qemu/iov.h"
diff --git a/block/io.c b/block/io.c
index 4e4cb556c5..3280144a17 100644
--- a/block/io.c
+++ b/block/io.c
@@ -32,6 +32,7 @@
#include "block/coroutines.h"
#include "block/write-threshold.h"
#include "qemu/cutils.h"
+#include "qemu/memalign.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
@@ -70,6 +71,7 @@ static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
void bdrv_parent_drained_end_single(BdrvChild *c)
{
int drained_end_counter = 0;
+ IO_OR_GS_CODE();
bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
BDRV_POLL_WHILE(c->bs, qatomic_read(&drained_end_counter) > 0);
}
@@ -114,6 +116,7 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
{
+ IO_OR_GS_CODE();
c->parent_quiesce_counter++;
if (c->klass->drained_begin) {
c->klass->drained_begin(c);
@@ -164,6 +167,8 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
BdrvChild *c;
bool have_limits;
+ GLOBAL_STATE_CODE();
+
if (tran) {
BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
*s = (BdrvRefreshLimitsState) {
@@ -189,10 +194,6 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
QLIST_FOREACH(c, &bs->children, next) {
if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
{
- bdrv_refresh_limits(c->bs, tran, errp);
- if (*errp) {
- return;
- }
bdrv_merge_limits(&bs->bl, &c->bs->bl);
have_limits = true;
}
@@ -226,12 +227,14 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
*/
void bdrv_enable_copy_on_read(BlockDriverState *bs)
{
+ IO_CODE();
qatomic_inc(&bs->copy_on_read);
}
void bdrv_disable_copy_on_read(BlockDriverState *bs)
{
int old = qatomic_fetch_dec(&bs->copy_on_read);
+ IO_CODE();
assert(old >= 1);
}
@@ -303,6 +306,7 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
BdrvChild *ignore_parent, bool ignore_bds_parents)
{
BdrvChild *child, *next;
+ IO_OR_GS_CODE();
if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
return true;
@@ -426,6 +430,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
BdrvChild *parent, bool ignore_bds_parents)
{
+ IO_OR_GS_CODE();
assert(!qemu_in_coroutine());
/* Stop things in parent-to-child order */
@@ -477,11 +482,13 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
void bdrv_drained_begin(BlockDriverState *bs)
{
+ IO_OR_GS_CODE();
bdrv_do_drained_begin(bs, false, NULL, false, true);
}
void bdrv_subtree_drained_begin(BlockDriverState *bs)
{
+ IO_OR_GS_CODE();
bdrv_do_drained_begin(bs, true, NULL, false, true);
}
@@ -538,18 +545,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
void bdrv_drained_end(BlockDriverState *bs)
{
int drained_end_counter = 0;
+ IO_OR_GS_CODE();
bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
}
void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
{
+ IO_CODE();
bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
}
void bdrv_subtree_drained_end(BlockDriverState *bs)
{
int drained_end_counter = 0;
+ IO_OR_GS_CODE();
bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
}
@@ -557,6 +567,7 @@ void bdrv_subtree_drained_end(BlockDriverState *bs)
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
{
int i;
+ IO_OR_GS_CODE();
for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
bdrv_do_drained_begin(child->bs, true, child, false, true);
@@ -567,6 +578,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
{
int drained_end_counter = 0;
int i;
+ IO_OR_GS_CODE();
for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
bdrv_do_drained_end(child->bs, true, child, false,
@@ -585,6 +597,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
*/
void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
{
+ IO_OR_GS_CODE();
assert(qemu_in_coroutine());
bdrv_drained_begin(bs);
bdrv_drained_end(bs);
@@ -592,6 +605,7 @@ void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
void bdrv_drain(BlockDriverState *bs)
{
+ IO_OR_GS_CODE();
bdrv_drained_begin(bs);
bdrv_drained_end(bs);
}
@@ -612,6 +626,7 @@ static bool bdrv_drain_all_poll(void)
{
BlockDriverState *bs = NULL;
bool result = false;
+ GLOBAL_STATE_CODE();
/* bdrv_drain_poll() can't make changes to the graph and we are holding the
* main AioContext lock, so iterating bdrv_next_all_states() is safe. */
@@ -640,6 +655,7 @@ static bool bdrv_drain_all_poll(void)
void bdrv_drain_all_begin(void)
{
BlockDriverState *bs = NULL;
+ GLOBAL_STATE_CODE();
if (qemu_in_coroutine()) {
bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
@@ -682,6 +698,7 @@ void bdrv_drain_all_begin(void)
void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
{
int drained_end_counter = 0;
+ GLOBAL_STATE_CODE();
g_assert(bs->quiesce_counter > 0);
g_assert(!bs->refcnt);
@@ -696,6 +713,7 @@ void bdrv_drain_all_end(void)
{
BlockDriverState *bs = NULL;
int drained_end_counter = 0;
+ GLOBAL_STATE_CODE();
/*
* bdrv queue is managed by record/replay,
@@ -723,6 +741,7 @@ void bdrv_drain_all_end(void)
void bdrv_drain_all(void)
{
+ GLOBAL_STATE_CODE();
bdrv_drain_all_begin();
bdrv_drain_all_end();
}
@@ -867,6 +886,7 @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
{
BdrvTrackedRequest *req;
Coroutine *self = qemu_coroutine_self();
+ IO_CODE();
QLIST_FOREACH(req, &bs->tracked_requests, list) {
if (req->co == self) {
@@ -886,7 +906,7 @@ void bdrv_round_to_clusters(BlockDriverState *bs,
int64_t *cluster_bytes)
{
BlockDriverInfo bdi;
-
+ IO_CODE();
if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
*cluster_offset = offset;
*cluster_bytes = bytes;
@@ -912,16 +932,19 @@ static int bdrv_get_cluster_size(BlockDriverState *bs)
void bdrv_inc_in_flight(BlockDriverState *bs)
{
+ IO_CODE();
qatomic_inc(&bs->in_flight);
}
void bdrv_wakeup(BlockDriverState *bs)
{
+ IO_CODE();
aio_wait_kick();
}
void bdrv_dec_in_flight(BlockDriverState *bs)
{
+ IO_CODE();
qatomic_dec(&bs->in_flight);
bdrv_wakeup(bs);
}
@@ -946,6 +969,7 @@ bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
uint64_t align)
{
bool waited;
+ IO_CODE();
qemu_co_mutex_lock(&req->bs->reqs_lock);
@@ -1040,6 +1064,7 @@ static int bdrv_check_request32(int64_t offset, int64_t bytes,
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int64_t bytes, BdrvRequestFlags flags)
{
+ IO_CODE();
return bdrv_pwritev(child, offset, bytes, NULL,
BDRV_REQ_ZERO_WRITE | flags);
}
@@ -1058,6 +1083,7 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
int ret;
int64_t target_size, bytes, offset = 0;
BlockDriverState *bs = child->bs;
+ IO_CODE();
target_size = bdrv_getlength(bs);
if (target_size < 0) {
@@ -1090,6 +1116,7 @@ int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes)
{
int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_CODE();
if (bytes < 0) {
return -EINVAL;
@@ -1111,6 +1138,7 @@ int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
{
int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_CODE();
if (bytes < 0) {
return -EINVAL;
@@ -1131,6 +1159,7 @@ int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
const void *buf, int64_t count)
{
int ret;
+ IO_CODE();
ret = bdrv_pwrite(child, offset, buf, count);
if (ret < 0) {
@@ -1797,6 +1826,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
int64_t offset, int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
+ IO_CODE();
return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
}
@@ -1809,6 +1839,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
BdrvTrackedRequest req;
BdrvRequestPadding pad;
int ret;
+ IO_CODE();
trace_bdrv_co_preadv_part(bs, offset, bytes, flags);
@@ -2173,6 +2204,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
padding = bdrv_init_padding(bs, offset, bytes, &pad);
if (padding) {
+ assert(!(flags & BDRV_REQ_NO_WAIT));
bdrv_make_request_serialising(req, align);
bdrv_padding_rmw_read(child, req, &pad, true);
@@ -2230,6 +2262,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
int64_t offset, int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
+ IO_CODE();
return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
}
@@ -2243,6 +2276,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
BdrvRequestPadding pad;
int ret;
bool padded = false;
+ IO_CODE();
trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags);
@@ -2307,6 +2341,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
* serialize the request to prevent interactions of the
* widened region with other transactions.
*/
+ assert(!(flags & BDRV_REQ_NO_WAIT));
bdrv_make_request_serialising(&req, align);
bdrv_padding_rmw_read(child, &req, &pad, false);
}
@@ -2326,6 +2361,7 @@ out:
int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
int64_t bytes, BdrvRequestFlags flags)
{
+ IO_CODE();
trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
@@ -2345,6 +2381,8 @@ int bdrv_flush_all(void)
BlockDriverState *bs = NULL;
int result = 0;
+ GLOBAL_STATE_CODE();
+
/*
* bdrv queue is managed by record/replay,
* creating new flush request for stopping
@@ -2639,6 +2677,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
BlockDriverState *p;
int64_t eof = 0;
int dummy;
+ IO_CODE();
assert(!include_base || base); /* Can't include NULL base */
@@ -2728,6 +2767,7 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file)
{
+ IO_CODE();
return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
pnum, map, file, NULL);
}
@@ -2735,6 +2775,7 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
int64_t *pnum, int64_t *map, BlockDriverState **file)
{
+ IO_CODE();
return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
offset, bytes, pnum, map, file);
}
@@ -2751,6 +2792,7 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
{
int ret;
int64_t pnum = bytes;
+ IO_CODE();
if (!bytes) {
return 1;
@@ -2771,6 +2813,7 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
{
int ret;
int64_t dummy;
+ IO_CODE();
ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
bytes, pnum ? pnum : &dummy, NULL,
@@ -2807,6 +2850,7 @@ int bdrv_is_allocated_above(BlockDriverState *top,
int ret = bdrv_common_block_status_above(top, base, include_base, false,
offset, bytes, pnum, NULL, NULL,
&depth);
+ IO_CODE();
if (ret < 0) {
return ret;
}
@@ -2823,6 +2867,7 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
BlockDriver *drv = bs->drv;
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret;
+ IO_CODE();
ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
if (ret < 0) {
@@ -2854,6 +2899,7 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
BlockDriver *drv = bs->drv;
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret;
+ IO_CODE();
ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
if (ret < 0) {
@@ -2884,6 +2930,7 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
int ret = bdrv_writev_vmstate(bs, &qiov, pos);
+ IO_CODE();
return ret < 0 ? ret : size;
}
@@ -2893,6 +2940,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
int ret = bdrv_readv_vmstate(bs, &qiov, pos);
+ IO_CODE();
return ret < 0 ? ret : size;
}
@@ -2902,6 +2950,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
void bdrv_aio_cancel(BlockAIOCB *acb)
{
+ IO_CODE();
qemu_aio_ref(acb);
bdrv_aio_cancel_async(acb);
while (acb->refcnt > 1) {
@@ -2926,6 +2975,7 @@ void bdrv_aio_cancel(BlockAIOCB *acb)
* In either case the completion callback must be called. */
void bdrv_aio_cancel_async(BlockAIOCB *acb)
{
+ IO_CODE();
if (acb->aiocb_info->cancel_async) {
acb->aiocb_info->cancel_async(acb);
}
@@ -2940,6 +2990,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
BdrvChild *child;
int current_gen;
int ret = 0;
+ IO_CODE();
bdrv_inc_in_flight(bs);
@@ -3065,6 +3116,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
int64_t max_pdiscard;
int head, tail, align;
BlockDriverState *bs = child->bs;
+ IO_CODE();
if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
return -ENOMEDIUM;
@@ -3183,6 +3235,7 @@ int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
.coroutine = qemu_coroutine_self(),
};
BlockAIOCB *acb;
+ IO_CODE();
bdrv_inc_in_flight(bs);
if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
@@ -3207,17 +3260,20 @@ out:
void *qemu_blockalign(BlockDriverState *bs, size_t size)
{
+ IO_CODE();
return qemu_memalign(bdrv_opt_mem_align(bs), size);
}
void *qemu_blockalign0(BlockDriverState *bs, size_t size)
{
+ IO_CODE();
return memset(qemu_blockalign(bs, size), 0, size);
}
void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
{
size_t align = bdrv_opt_mem_align(bs);
+ IO_CODE();
/* Ensure that NULL is never returned on success */
assert(align > 0);
@@ -3231,6 +3287,7 @@ void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
{
void *mem = qemu_try_blockalign(bs, size);
+ IO_CODE();
if (mem) {
memset(mem, 0, size);
@@ -3246,6 +3303,7 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
{
int i;
size_t alignment = bdrv_min_mem_align(bs);
+ IO_CODE();
for (i = 0; i < qiov->niov; i++) {
if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
@@ -3262,6 +3320,7 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
void bdrv_io_plug(BlockDriverState *bs)
{
BdrvChild *child;
+ IO_CODE();
QLIST_FOREACH(child, &bs->children, next) {
bdrv_io_plug(child->bs);
@@ -3278,6 +3337,7 @@ void bdrv_io_plug(BlockDriverState *bs)
void bdrv_io_unplug(BlockDriverState *bs)
{
BdrvChild *child;
+ IO_CODE();
assert(bs->io_plugged);
if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
@@ -3296,6 +3356,7 @@ void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
{
BdrvChild *child;
+ GLOBAL_STATE_CODE();
if (bs->drv && bs->drv->bdrv_register_buf) {
bs->drv->bdrv_register_buf(bs, host, size);
}
@@ -3308,6 +3369,7 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host)
{
BdrvChild *child;
+ GLOBAL_STATE_CODE();
if (bs->drv && bs->drv->bdrv_unregister_buf) {
bs->drv->bdrv_unregister_buf(bs, host);
}
@@ -3328,6 +3390,8 @@ static int coroutine_fn bdrv_co_copy_range_internal(
/* TODO We can support BDRV_REQ_NO_FALLBACK here */
assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
+ assert(!(read_flags & BDRV_REQ_NO_WAIT));
+ assert(!(write_flags & BDRV_REQ_NO_WAIT));
if (!dst || !dst->bs || !bdrv_is_inserted(dst->bs)) {
return -ENOMEDIUM;
@@ -3402,6 +3466,7 @@ int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
+ IO_CODE();
trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
read_flags, write_flags);
return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
@@ -3418,6 +3483,7 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
+ IO_CODE();
trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
read_flags, write_flags);
return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
@@ -3429,6 +3495,7 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
int64_t bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
+ IO_CODE();
return bdrv_co_copy_range_from(src, src_offset,
dst, dst_offset,
bytes, read_flags, write_flags);
@@ -3461,7 +3528,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
BdrvTrackedRequest req;
int64_t old_size, new_bytes;
int ret;
-
+ IO_CODE();
/* if bs->drv == NULL, bs is closed, so there's nothing to do here */
if (!drv) {
@@ -3579,6 +3646,7 @@ out:
void bdrv_cancel_in_flight(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
if (!bs || !bs->drv) {
return;
}
@@ -3587,3 +3655,75 @@ void bdrv_cancel_in_flight(BlockDriverState *bs)
bs->drv->bdrv_cancel_in_flight(bs);
}
}
+
+int coroutine_fn
+bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset)
+{
+ BlockDriverState *bs = child->bs;
+ BlockDriver *drv = bs->drv;
+ int ret;
+ IO_CODE();
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+
+ if (!drv->bdrv_co_preadv_snapshot) {
+ return -ENOTSUP;
+ }
+
+ bdrv_inc_in_flight(bs);
+ ret = drv->bdrv_co_preadv_snapshot(bs, offset, bytes, qiov, qiov_offset);
+ bdrv_dec_in_flight(bs);
+
+ return ret;
+}
+
+int coroutine_fn
+bdrv_co_snapshot_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
+ BlockDriverState **file)
+{
+ BlockDriver *drv = bs->drv;
+ int ret;
+ IO_CODE();
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+
+ if (!drv->bdrv_co_snapshot_block_status) {
+ return -ENOTSUP;
+ }
+
+ bdrv_inc_in_flight(bs);
+ ret = drv->bdrv_co_snapshot_block_status(bs, want_zero, offset, bytes,
+ pnum, map, file);
+ bdrv_dec_in_flight(bs);
+
+ return ret;
+}
+
+int coroutine_fn
+bdrv_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+ BlockDriver *drv = bs->drv;
+ int ret;
+ IO_CODE();
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+
+ if (!drv->bdrv_co_pdiscard_snapshot) {
+ return -ENOTSUP;
+ }
+
+ bdrv_inc_in_flight(bs);
+ ret = drv->bdrv_co_pdiscard_snapshot(bs, offset, bytes);
+ bdrv_dec_in_flight(bs);
+
+ return ret;
+}
diff --git a/block/meson.build b/block/meson.build
index 8a1ce58c9c..0b2a60c99b 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -32,7 +32,9 @@ block_ss.add(files(
'qcow2.c',
'quorum.c',
'raw-format.c',
+ 'reqlist.c',
'snapshot.c',
+ 'snapshot-access.c',
'throttle-groups.c',
'throttle.c',
'vhdx-endian.c',
@@ -131,8 +133,11 @@ block_ss.add(module_block_h)
wrapper_py = find_program('../scripts/block-coroutine-wrapper.py')
block_gen_c = custom_target('block-gen.c',
output: 'block-gen.c',
- input: files('../include/block/block.h',
- 'coroutines.h'),
+ input: files(
+ '../include/block/block-io.h',
+ '../include/block/block-global-state.h',
+ 'coroutines.h'
+ ),
command: [wrapper_py, '@OUTPUT@', '@INPUT@'])
block_ss.add(block_gen_c)
diff --git a/block/mirror.c b/block/mirror.c
index 69b2c1c697..d8ecb9efa2 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -23,6 +23,7 @@
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
#include "qemu/bitmap.h"
+#include "qemu/memalign.h"
#define MAX_IN_FLIGHT 16
#define MAX_IO_BYTES (1 << 20) /* 1 Mb */
@@ -1864,6 +1865,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
bool is_none_mode;
BlockDriverState *base;
+ GLOBAL_STATE_CODE();
+
if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) ||
(mode == MIRROR_SYNC_MODE_BITMAP)) {
error_setg(errp, "Sync mode '%s' not supported",
@@ -1889,6 +1892,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
bool base_read_only;
BlockJob *job;
+ GLOBAL_STATE_CODE();
+
base_read_only = bdrv_is_read_only(base);
if (base_read_only) {
diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c
index 9f11deec64..8e35616c2e 100644
--- a/block/monitor/bitmap-qmp-cmds.c
+++ b/block/monitor/bitmap-qmp-cmds.c
@@ -56,6 +56,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
BlockDriverState *bs;
BdrvDirtyBitmap *bitmap;
+ GLOBAL_STATE_CODE();
+
if (!node) {
error_setg(errp, "Node cannot be NULL");
return NULL;
@@ -155,6 +157,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
BdrvDirtyBitmap *bitmap;
AioContext *aio_context;
+ GLOBAL_STATE_CODE();
+
bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
if (!bitmap || !bs) {
return NULL;
@@ -259,7 +263,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
BlockDriverState *bs;
BdrvDirtyBitmap *dst, *src, *anon;
BlockDirtyBitmapMergeSourceList *lst;
- Error *local_err = NULL;
+
+ GLOBAL_STATE_CODE();
dst = block_dirty_bitmap_lookup(node, target, &bs, errp);
if (!dst) {
@@ -297,9 +302,7 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
abort();
}
- bdrv_merge_dirty_bitmap(anon, src, NULL, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (!bdrv_merge_dirty_bitmap(anon, src, NULL, errp)) {
dst = NULL;
goto out;
}
diff --git a/block/nbd.c b/block/nbd.c
index 5853d85d60..146d25660e 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -313,6 +313,7 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs,
BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
int ret;
bool blocking = nbd_client_connecting_wait(s);
+ IO_CODE();
assert(!s->ioc);
diff --git a/block/nvme.c b/block/nvme.c
index dd20de3865..552029931d 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -21,6 +21,7 @@
#include "qemu/module.h"
#include "qemu/cutils.h"
#include "qemu/option.h"
+#include "qemu/memalign.h"
#include "qemu/vfio-helpers.h"
#include "block/block_int.h"
#include "sysemu/replay.h"
diff --git a/block/parallels-ext.c b/block/parallels-ext.c
index e0dd0975c6..cb22a427d7 100644
--- a/block/parallels-ext.c
+++ b/block/parallels-ext.c
@@ -29,6 +29,7 @@
#include "parallels.h"
#include "crypto/hash.h"
#include "qemu/uuid.h"
+#include "qemu/memalign.h"
#define PARALLELS_FORMAT_EXTENSION_MAGIC 0xAB234CEF23DCEA87ULL
diff --git a/block/parallels.c b/block/parallels.c
index 6ebad2a2bb..cd23e02d06 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -41,6 +41,7 @@
#include "qapi/qapi-visit-block-core.h"
#include "qemu/bswap.h"
#include "qemu/bitmap.h"
+#include "qemu/memalign.h"
#include "migration/blocker.h"
#include "parallels.h"
@@ -873,7 +874,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
s->bat_dirty_bmap =
bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));
- /* Disable migration until bdrv_invalidate_cache method is added */
+ /* Disable migration until bdrv_activate method is added */
error_setg(&s->migration_blocker, "The Parallels format used by node '%s' "
"does not support live migration",
bdrv_get_device_or_node_name(bs));
diff --git a/block/preallocate.c b/block/preallocate.c
index 1d4233f730..e15cb8c74a 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c
@@ -276,6 +276,10 @@ static bool coroutine_fn handle_write(BlockDriverState *bs, int64_t offset,
int64_t end = offset + bytes;
int64_t prealloc_start, prealloc_end;
int ret;
+ uint32_t file_align = bs->file->bs->bl.request_alignment;
+ uint32_t prealloc_align = MAX(s->opts.prealloc_align, file_align);
+
+ assert(QEMU_IS_ALIGNED(prealloc_align, file_align));
if (!has_prealloc_perms(bs)) {
/* We don't have state neither should try to recover it */
@@ -320,9 +324,14 @@ static bool coroutine_fn handle_write(BlockDriverState *bs, int64_t offset,
/* Now we want new preallocation, as request writes beyond s->file_end. */
- prealloc_start = want_merge_zero ? MIN(offset, s->file_end) : s->file_end;
- prealloc_end = QEMU_ALIGN_UP(end + s->opts.prealloc_size,
- s->opts.prealloc_align);
+ prealloc_start = QEMU_ALIGN_UP(
+ want_merge_zero ? MIN(offset, s->file_end) : s->file_end,
+ file_align);
+ prealloc_end = QEMU_ALIGN_UP(
+ MAX(prealloc_start, end) + s->opts.prealloc_size,
+ prealloc_align);
+
+ want_merge_zero = want_merge_zero && (prealloc_start <= offset);
ret = bdrv_co_pwrite_zeroes(
bs->file, prealloc_start, prealloc_end - prealloc_start,
diff --git a/block/qcow.c b/block/qcow.c
index c39940f33e..4fba1b9e36 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -32,6 +32,7 @@
#include "qemu/option.h"
#include "qemu/bswap.h"
#include "qemu/cutils.h"
+#include "qemu/memalign.h"
#include <zlib.h>
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qstring.h"
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 7444b9c4ab..8a0105911f 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -23,6 +23,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/memalign.h"
#include "qcow2.h"
#include "trace.h"
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 21884a1ab9..20a16ba6ee 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -28,6 +28,7 @@
#include "qapi/error.h"
#include "qcow2.h"
#include "qemu/bswap.h"
+#include "qemu/memalign.h"
#include "trace.h"
int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t exact_size)
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 4614572252..94033972be 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -28,6 +28,7 @@
#include "qemu/range.h"
#include "qemu/bswap.h"
#include "qemu/cutils.h"
+#include "qemu/memalign.h"
#include "trace.h"
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 71ddb08c21..075269a023 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -29,6 +29,7 @@
#include "qemu/bswap.h"
#include "qemu/error-report.h"
#include "qemu/cutils.h"
+#include "qemu/memalign.h"
static void qcow2_free_single_snapshot(BlockDriverState *bs, int i)
{
diff --git a/block/qcow2.c b/block/qcow2.c
index c8115e1cba..b5c47931ef 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -38,6 +38,7 @@
#include "qemu/option_int.h"
#include "qemu/cutils.h"
#include "qemu/bswap.h"
+#include "qemu/memalign.h"
#include "qapi/qobject-input-visitor.h"
#include "qapi/qapi-visit-block-core.h"
#include "crypto.h"
diff --git a/block/qed-l2-cache.c b/block/qed-l2-cache.c
index b548362398..caf2c024c2 100644
--- a/block/qed-l2-cache.c
+++ b/block/qed-l2-cache.c
@@ -51,6 +51,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/memalign.h"
#include "trace.h"
#include "qed.h"
diff --git a/block/qed-table.c b/block/qed-table.c
index 405d446cbe..1cc844b1a5 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -17,6 +17,7 @@
#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
#include "qed.h"
#include "qemu/bswap.h"
+#include "qemu/memalign.h"
/* Called with table_lock held. */
static int coroutine_fn qed_read_table(BDRVQEDState *s, uint64_t offset,
diff --git a/block/qed.c b/block/qed.c
index 558d3646c4..f34d9a3ac1 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -20,6 +20,7 @@
#include "qemu/main-loop.h"
#include "qemu/module.h"
#include "qemu/option.h"
+#include "qemu/memalign.h"
#include "trace.h"
#include "qed.h"
#include "sysemu/block-backend.h"
diff --git a/block/quorum.c b/block/quorum.c
index c28dda7baa..f33f30d36b 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -17,6 +17,7 @@
#include "qemu/cutils.h"
#include "qemu/module.h"
#include "qemu/option.h"
+#include "qemu/memalign.h"
#include "block/block_int.h"
#include "block/coroutines.h"
#include "block/qdict.h"
diff --git a/block/raw-format.c b/block/raw-format.c
index bda757fd19..69fd650eaf 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -31,6 +31,7 @@
#include "qapi/error.h"
#include "qemu/module.h"
#include "qemu/option.h"
+#include "qemu/memalign.h"
typedef struct BDRVRawState {
uint64_t offset;
diff --git a/block/reqlist.c b/block/reqlist.c
new file mode 100644
index 0000000000..08cb57cfa4
--- /dev/null
+++ b/block/reqlist.c
@@ -0,0 +1,85 @@
+/*
+ * reqlist API
+ *
+ * Copyright (C) 2013 Proxmox Server Solutions
+ * Copyright (c) 2021 Virtuozzo International GmbH.
+ *
+ * Authors:
+ * Dietmar Maurer (dietmar@proxmox.com)
+ * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/range.h"
+
+#include "block/reqlist.h"
+
+void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset,
+ int64_t bytes)
+{
+ assert(!reqlist_find_conflict(reqs, offset, bytes));
+
+ *req = (BlockReq) {
+ .offset = offset,
+ .bytes = bytes,
+ };
+ qemu_co_queue_init(&req->wait_queue);
+ QLIST_INSERT_HEAD(reqs, req, list);
+}
+
+BlockReq *reqlist_find_conflict(BlockReqList *reqs, int64_t offset,
+ int64_t bytes)
+{
+ BlockReq *r;
+
+ QLIST_FOREACH(r, reqs, list) {
+ if (ranges_overlap(offset, bytes, r->offset, r->bytes)) {
+ return r;
+ }
+ }
+
+ return NULL;
+}
+
+bool coroutine_fn reqlist_wait_one(BlockReqList *reqs, int64_t offset,
+ int64_t bytes, CoMutex *lock)
+{
+ BlockReq *r = reqlist_find_conflict(reqs, offset, bytes);
+
+ if (!r) {
+ return false;
+ }
+
+ qemu_co_queue_wait(&r->wait_queue, lock);
+
+ return true;
+}
+
+void coroutine_fn reqlist_wait_all(BlockReqList *reqs, int64_t offset,
+ int64_t bytes, CoMutex *lock)
+{
+ while (reqlist_wait_one(reqs, offset, bytes, lock)) {
+ /* continue */
+ }
+}
+
+void coroutine_fn reqlist_shrink_req(BlockReq *req, int64_t new_bytes)
+{
+ if (new_bytes == req->bytes) {
+ return;
+ }
+
+ assert(new_bytes > 0 && new_bytes < req->bytes);
+
+ req->bytes = new_bytes;
+ qemu_co_queue_restart_all(&req->wait_queue);
+}
+
+void coroutine_fn reqlist_remove_req(BlockReq *req)
+{
+ QLIST_REMOVE(req, list);
+ qemu_co_queue_restart_all(&req->wait_queue);
+}
diff --git a/block/snapshot-access.c b/block/snapshot-access.c
new file mode 100644
index 0000000000..77b87c1946
--- /dev/null
+++ b/block/snapshot-access.c
@@ -0,0 +1,132 @@
+/*
+ * snapshot_access block driver
+ *
+ * Copyright (c) 2022 Virtuozzo International GmbH.
+ *
+ * Author:
+ * Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+
+#include "sysemu/block-backend.h"
+#include "qemu/cutils.h"
+#include "block/block_int.h"
+
+static coroutine_fn int
+snapshot_access_co_preadv_part(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
+{
+ if (flags) {
+ return -ENOTSUP;
+ }
+
+ return bdrv_co_preadv_snapshot(bs->file, offset, bytes, qiov, qiov_offset);
+}
+
+static int coroutine_fn
+snapshot_access_co_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t offset,
+ int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file)
+{
+ return bdrv_co_snapshot_block_status(bs->file->bs, want_zero, offset,
+ bytes, pnum, map, file);
+}
+
+static int coroutine_fn snapshot_access_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int64_t bytes)
+{
+ return bdrv_co_pdiscard_snapshot(bs->file->bs, offset, bytes);
+}
+
+static int coroutine_fn
+snapshot_access_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ BdrvRequestFlags flags)
+{
+ return -ENOTSUP;
+}
+
+static coroutine_fn int
+snapshot_access_co_pwritev_part(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
+{
+ return -ENOTSUP;
+}
+
+
+static void snapshot_access_refresh_filename(BlockDriverState *bs)
+{
+ pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
+ bs->file->bs->filename);
+}
+
+static int snapshot_access_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
+ BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY,
+ false, errp);
+ if (!bs->file) {
+ return -EINVAL;
+ }
+
+ bs->total_sectors = bs->file->bs->total_sectors;
+
+ return 0;
+}
+
+static void snapshot_access_child_perm(BlockDriverState *bs, BdrvChild *c,
+ BdrvChildRole role,
+ BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ /*
+ * Currently, we don't need any permissions. If bs->file provides
+ * snapshot-access API, we can use it.
+ */
+ *nperm = 0;
+ *nshared = BLK_PERM_ALL;
+}
+
+BlockDriver bdrv_snapshot_access_drv = {
+ .format_name = "snapshot-access",
+
+ .bdrv_open = snapshot_access_open,
+
+ .bdrv_co_preadv_part = snapshot_access_co_preadv_part,
+ .bdrv_co_pwritev_part = snapshot_access_co_pwritev_part,
+ .bdrv_co_pwrite_zeroes = snapshot_access_co_pwrite_zeroes,
+ .bdrv_co_pdiscard = snapshot_access_co_pdiscard,
+ .bdrv_co_block_status = snapshot_access_co_block_status,
+
+ .bdrv_refresh_filename = snapshot_access_refresh_filename,
+
+ .bdrv_child_perm = snapshot_access_child_perm,
+};
+
+static void snapshot_access_init(void)
+{
+ bdrv_register(&bdrv_snapshot_access_drv);
+}
+
+block_init(snapshot_access_init);
diff --git a/block/snapshot.c b/block/snapshot.c
index ccacda8bd5..d6f53c3065 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -57,6 +57,8 @@ int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
QEMUSnapshotInfo *sn_tab, *sn;
int nb_sns, i, ret;
+ GLOBAL_STATE_CODE();
+
ret = -ENOENT;
nb_sns = bdrv_snapshot_list(bs, &sn_tab);
if (nb_sns < 0) {
@@ -105,6 +107,7 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
bool ret = false;
assert(id || name);
+ GLOBAL_STATE_CODE();
nb_sns = bdrv_snapshot_list(bs, &sn_tab);
if (nb_sns < 0) {
@@ -200,6 +203,7 @@ static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs)
int bdrv_can_snapshot(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
+ GLOBAL_STATE_CODE();
if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
return 0;
}
@@ -220,6 +224,9 @@ int bdrv_snapshot_create(BlockDriverState *bs,
{
BlockDriver *drv = bs->drv;
BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
+
+ GLOBAL_STATE_CODE();
+
if (!drv) {
return -ENOMEDIUM;
}
@@ -240,6 +247,8 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
BdrvChild **fallback_ptr;
int ret, open_ret;
+ GLOBAL_STATE_CODE();
+
if (!drv) {
error_setg(errp, "Block driver is closed");
return -ENOMEDIUM;
@@ -348,6 +357,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
int ret;
+ GLOBAL_STATE_CODE();
+
if (!drv) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
return -ENOMEDIUM;
@@ -380,6 +391,8 @@ int bdrv_snapshot_list(BlockDriverState *bs,
{
BlockDriver *drv = bs->drv;
BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
+
+ GLOBAL_STATE_CODE();
if (!drv) {
return -ENOMEDIUM;
}
@@ -419,6 +432,8 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
{
BlockDriver *drv = bs->drv;
+ GLOBAL_STATE_CODE();
+
if (!drv) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
return -ENOMEDIUM;
@@ -447,6 +462,8 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
int ret;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
+
ret = bdrv_snapshot_load_tmp(bs, id_or_name, NULL, &local_err);
if (ret == -ENOENT || ret == -EINVAL) {
error_free(local_err);
@@ -515,6 +532,8 @@ bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ GLOBAL_STATE_CODE();
+
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return false;
}
@@ -549,6 +568,8 @@ int bdrv_all_delete_snapshot(const char *name,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ GLOBAL_STATE_CODE();
+
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
}
@@ -588,6 +609,8 @@ int bdrv_all_goto_snapshot(const char *name,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ GLOBAL_STATE_CODE();
+
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
}
@@ -622,6 +645,8 @@ int bdrv_all_has_snapshot(const char *name,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ GLOBAL_STATE_CODE();
+
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
}
@@ -663,6 +688,7 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
{
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ GLOBAL_STATE_CODE();
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
@@ -703,6 +729,8 @@ BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ GLOBAL_STATE_CODE();
+
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return NULL;
}
diff --git a/block/stream.c b/block/stream.c
index 7c6b173ddd..3acb59fe6a 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -220,6 +220,8 @@ void stream_start(const char *job_id, BlockDriverState *bs,
QDict *opts;
int ret;
+ GLOBAL_STATE_CODE();
+
assert(!(base && bottom));
assert(!(backing_file_str && bottom));
diff --git a/block/vdi.c b/block/vdi.c
index bdc58d726e..cca3a3a356 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -64,6 +64,7 @@
#include "qemu/coroutine.h"
#include "qemu/cutils.h"
#include "qemu/uuid.h"
+#include "qemu/memalign.h"
/* Code configuration options. */
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index 7672161d95..ff0d4e0da0 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -23,6 +23,7 @@
#include "block/block_int.h"
#include "qemu/error-report.h"
#include "qemu/bswap.h"
+#include "qemu/memalign.h"
#include "vhdx.h"
diff --git a/block/vhdx.c b/block/vhdx.c
index 356ec4c455..410c6f9610 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -25,6 +25,7 @@
#include "qemu/crc32c.h"
#include "qemu/bswap.h"
#include "qemu/error-report.h"
+#include "qemu/memalign.h"
#include "vhdx.h"
#include "migration/blocker.h"
#include "qemu/uuid.h"
diff --git a/block/vmdk.c b/block/vmdk.c
index 0dfab6e941..37c0946066 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -33,6 +33,7 @@
#include "qemu/module.h"
#include "qemu/option.h"
#include "qemu/bswap.h"
+#include "qemu/memalign.h"
#include "migration/blocker.h"
#include "qemu/cutils.h"
#include <zlib.h>
diff --git a/block/vpc.c b/block/vpc.c
index 297a26262a..4d8f16e199 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -33,6 +33,7 @@
#include "migration/blocker.h"
#include "qemu/bswap.h"
#include "qemu/uuid.h"
+#include "qemu/memalign.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qobject-input-visitor.h"
#include "qapi/qapi-visit-block-core.h"
diff --git a/block/win32-aio.c b/block/win32-aio.c
index c57e10c997..aadc7b1bc3 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -29,6 +29,7 @@
#include "block/raw-aio.h"
#include "qemu/event_notifier.h"
#include "qemu/iov.h"
+#include "qemu/memalign.h"
#include <windows.h>
#include <winioctl.h>
diff --git a/blockdev.c b/blockdev.c
index 42e098b458..e46e831212 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -63,11 +63,13 @@
#include "qemu/main-loop.h"
#include "qemu/throttle-options.h"
+/* Protected by BQL */
QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
void bdrv_set_monitor_owned(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
}
@@ -111,6 +113,8 @@ void override_max_devs(BlockInterfaceType type, int max_devs)
BlockBackend *blk;
DriveInfo *dinfo;
+ GLOBAL_STATE_CODE();
+
if (max_devs <= 0) {
return;
}
@@ -140,6 +144,8 @@ void blockdev_mark_auto_del(BlockBackend *blk)
DriveInfo *dinfo = blk_legacy_dinfo(blk);
BlockJob *job;
+ GLOBAL_STATE_CODE();
+
if (!dinfo) {
return;
}
@@ -161,6 +167,7 @@ void blockdev_mark_auto_del(BlockBackend *blk)
void blockdev_auto_del(BlockBackend *blk)
{
DriveInfo *dinfo = blk_legacy_dinfo(blk);
+ GLOBAL_STATE_CODE();
if (dinfo && dinfo->auto_del) {
monitor_remove_blk(blk);
@@ -185,6 +192,8 @@ QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
{
QemuOpts *opts;
+ GLOBAL_STATE_CODE();
+
opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
if (!opts) {
return NULL;
@@ -205,6 +214,8 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
BlockBackend *blk;
DriveInfo *dinfo;
+ GLOBAL_STATE_CODE();
+
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
dinfo = blk_legacy_dinfo(blk);
if (dinfo && dinfo->type == type
@@ -227,6 +238,8 @@ void drive_check_orphaned(void)
Location loc;
bool orphans = false;
+ GLOBAL_STATE_CODE();
+
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
dinfo = blk_legacy_dinfo(blk);
/*
@@ -260,6 +273,7 @@ void drive_check_orphaned(void)
DriveInfo *drive_get_by_index(BlockInterfaceType type, int index)
{
+ GLOBAL_STATE_CODE();
return drive_get(type,
drive_index_to_bus_id(type, index),
drive_index_to_unit_id(type, index));
@@ -271,6 +285,8 @@ int drive_get_max_bus(BlockInterfaceType type)
BlockBackend *blk;
DriveInfo *dinfo;
+ GLOBAL_STATE_CODE();
+
max_bus = -1;
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
dinfo = blk_legacy_dinfo(blk);
@@ -628,6 +644,7 @@ BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
{
int bdrv_flags = 0;
+ GLOBAL_STATE_CODE();
/* bdrv_open() defaults to the values in bdrv_flags (for compatibility
* with other callers) rather than what we want as the real defaults.
* Apply the defaults here instead. */
@@ -646,6 +663,7 @@ void blockdev_close_all_bdrv_states(void)
{
BlockDriverState *bs, *next_bs;
+ GLOBAL_STATE_CODE();
QTAILQ_FOREACH_SAFE(bs, &monitor_bdrv_states, monitor_list, next_bs) {
AioContext *ctx = bdrv_get_aio_context(bs);
@@ -658,6 +676,7 @@ void blockdev_close_all_bdrv_states(void)
/* Iterates over the list of monitor-owned BlockDriverStates */
BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
return bs ? QTAILQ_NEXT(bs, monitor_list)
: QTAILQ_FIRST(&monitor_bdrv_states);
}
@@ -754,6 +773,8 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type,
const char *filename;
int i;
+ GLOBAL_STATE_CODE();
+
/* Change legacy command line options into QMP ones */
static const struct {
const char *from;
@@ -1174,6 +1195,8 @@ typedef struct BlkActionState BlkActionState;
*
* Only prepare() may fail. In a single transaction, only one of commit() or
* abort() will be called. clean() will always be called if it is present.
+ *
+ * Always run under BQL.
*/
typedef struct BlkActionOps {
size_t instance_size;
@@ -2283,6 +2306,8 @@ static TransactionProperties *get_transaction_properties(
/*
* 'Atomic' group operations. The operations are performed as a set, and if
* any fail then we roll back all operations in the group.
+ *
+ * Always run under BQL.
*/
void qmp_transaction(TransactionActionList *dev_list,
bool has_props,
@@ -2294,6 +2319,8 @@ void qmp_transaction(TransactionActionList *dev_list,
BlkActionState *state, *next;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
+
QTAILQ_HEAD(, BlkActionState) snap_bdrv_states;
QTAILQ_INIT(&snap_bdrv_states);
@@ -3596,6 +3623,8 @@ void qmp_blockdev_del(const char *node_name, Error **errp)
AioContext *aio_context;
BlockDriverState *bs;
+ GLOBAL_STATE_CODE();
+
bs = bdrv_find_node(node_name);
if (!bs) {
error_setg(errp, "Failed to find node with node-name='%s'", node_name);
diff --git a/blockjob.c b/blockjob.c
index 10815a89fe..4868453d74 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -62,6 +62,7 @@ static bool is_block_job(Job *job)
BlockJob *block_job_next(BlockJob *bjob)
{
Job *job = bjob ? &bjob->job : NULL;
+ GLOBAL_STATE_CODE();
do {
job = job_next(job);
@@ -73,6 +74,7 @@ BlockJob *block_job_next(BlockJob *bjob)
BlockJob *block_job_get(const char *id)
{
Job *job = job_get(id);
+ GLOBAL_STATE_CODE();
if (job && is_block_job(job)) {
return container_of(job, BlockJob, job);
@@ -84,6 +86,7 @@ BlockJob *block_job_get(const char *id)
void block_job_free(Job *job)
{
BlockJob *bjob = container_of(job, BlockJob, job);
+ GLOBAL_STATE_CODE();
block_job_remove_all_bdrv(bjob);
ratelimit_destroy(&bjob->limit);
@@ -183,6 +186,7 @@ static const BdrvChildClass child_job = {
void block_job_remove_all_bdrv(BlockJob *job)
{
+ GLOBAL_STATE_CODE();
/*
* bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(),
* which will also traverse job->nodes, so consume the list one by
@@ -205,6 +209,7 @@ void block_job_remove_all_bdrv(BlockJob *job)
bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs)
{
GSList *el;
+ GLOBAL_STATE_CODE();
for (el = job->nodes; el; el = el->next) {
BdrvChild *c = el->data;
@@ -221,6 +226,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
{
BdrvChild *c;
bool need_context_ops;
+ GLOBAL_STATE_CODE();
bdrv_ref(bs);
@@ -270,6 +276,8 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
const BlockJobDriver *drv = block_job_driver(job);
int64_t old_speed = job->speed;
+ GLOBAL_STATE_CODE();
+
if (job_apply_verb(&job->job, JOB_VERB_SET_SPEED, errp) < 0) {
return false;
}
@@ -299,6 +307,7 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n)
{
+ IO_CODE();
return ratelimit_calculate_delay(&job->limit, n);
}
@@ -307,6 +316,8 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
BlockJobInfo *info;
uint64_t progress_current, progress_total;
+ GLOBAL_STATE_CODE();
+
if (block_job_is_internal(job)) {
error_setg(errp, "Cannot query QEMU internal jobs");
return NULL;
@@ -434,6 +445,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
{
BlockJob *job;
int ret;
+ GLOBAL_STATE_CODE();
if (job_id == NULL && !(flags & JOB_INTERNAL)) {
job_id = bdrv_get_device_name(bs);
@@ -488,6 +500,7 @@ fail:
void block_job_iostatus_reset(BlockJob *job)
{
+ GLOBAL_STATE_CODE();
if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
return;
}
@@ -498,6 +511,7 @@ void block_job_iostatus_reset(BlockJob *job)
void block_job_user_resume(Job *job)
{
BlockJob *bjob = container_of(job, BlockJob, job);
+ GLOBAL_STATE_CODE();
block_job_iostatus_reset(bjob);
}
@@ -505,6 +519,7 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
int is_read, int error)
{
BlockErrorAction action;
+ IO_CODE();
switch (on_err) {
case BLOCKDEV_ON_ERROR_ENOSPC:
@@ -543,5 +558,6 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
AioContext *block_job_get_aio_context(BlockJob *job)
{
+ GLOBAL_STATE_CODE();
return job->job.aio_context;
}
diff --git a/configure b/configure
index daccf4be7c..886000346a 100755
--- a/configure
+++ b/configure
@@ -630,10 +630,10 @@ case "$cpu" in
ppc)
CPU_CFLAGS="-m32" ;;
ppc64)
- CPU_CFLAGS="-m64 -mbig" ;;
+ CPU_CFLAGS="-m64 -mbig-endian" ;;
ppc64le)
cpu="ppc64"
- CPU_CFLAGS="-m64 -mlittle" ;;
+ CPU_CFLAGS="-m64 -mlittle-endian" ;;
s390)
CPU_CFLAGS="-m31" ;;
diff --git a/cpu.c b/cpu.c
index 3ea38aea70..d564886149 100644
--- a/cpu.c
+++ b/cpu.c
@@ -35,10 +35,12 @@
#include "sysemu/tcg.h"
#include "sysemu/kvm.h"
#include "sysemu/replay.h"
+#include "exec/exec-all.h"
#include "exec/translate-all.h"
#include "exec/log.h"
#include "hw/core/accel-cpu.h"
#include "trace/trace-root.h"
+#include "qemu/accel.h"
uintptr_t qemu_host_page_size;
intptr_t qemu_host_page_mask;
@@ -415,11 +417,11 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...)
/* physical memory access (slow version, mainly for debug) */
#if defined(CONFIG_USER_ONLY)
-int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
- void *ptr, target_ulong len, bool is_write)
+int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
+ void *ptr, size_t len, bool is_write)
{
int flags;
- target_ulong l, page;
+ vaddr l, page;
void * p;
uint8_t *buf = ptr;
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
index 32863b4aa5..e10684bf53 100644
--- a/docs/specs/index.rst
+++ b/docs/specs/index.rst
@@ -19,3 +19,4 @@ guest hardware that is specific to QEMU.
acpi_pci_hotplug
acpi_nvdimm
acpi_erst
+ sev-guest-firmware
diff --git a/docs/specs/sev-guest-firmware.rst b/docs/specs/sev-guest-firmware.rst
new file mode 100644
index 0000000000..3f7f082df5
--- /dev/null
+++ b/docs/specs/sev-guest-firmware.rst
@@ -0,0 +1,125 @@
+====================================================
+QEMU/Guest Firmware Interface for AMD SEV and SEV-ES
+====================================================
+
+Overview
+========
+
+The guest firmware image (OVMF) may contain some configuration entries
+which are used by QEMU before the guest launches. These are listed in a
+GUIDed table at a known location in the firmware image. QEMU parses
+this table when it loads the firmware image into memory, and then QEMU
+reads individual entries when their values are needed.
+
+Though nothing in the table structure is SEV-specific, currently all the
+entries in the table are related to SEV and SEV-ES features.
+
+
+Table parsing in QEMU
+---------------------
+
+The table is parsed from the footer: first the presence of the table
+footer GUID (96b582de-1fb2-45f7-baea-a366c55a082d) at 0xffffffd0 is
+verified. If that is found, two bytes at 0xffffffce are the entire
+table length.
+
+Then the table is scanned backwards looking for the specific entry GUID.
+
+QEMU files related to parsing and scanning the OVMF table:
+ - ``hw/i386/pc_sysfw_ovmf.c``
+
+The edk2 firmware code that constructs this structure is in the
+`OVMF Reset Vector file`_.
+
+
+Table memory layout
+-------------------
+
++------------+--------+-----------------------------------------+
+| GPA | Length | Description |
++============+========+=========================================+
+| 0xffffff80 | 4 | Zero padding |
++------------+--------+-----------------------------------------+
+| 0xffffff84 | 4 | SEV hashes table base address |
++------------+--------+-----------------------------------------+
+| 0xffffff88 | 4 | SEV hashes table size (=0x400) |
++------------+--------+-----------------------------------------+
+| 0xffffff8c | 2 | SEV hashes table entry length (=0x1a) |
++------------+--------+-----------------------------------------+
+| 0xffffff8e | 16 | SEV hashes table GUID: |
+| | | 7255371f-3a3b-4b04-927b-1da6efa8d454 |
++------------+--------+-----------------------------------------+
+| 0xffffff9e | 4 | SEV secret block base address |
++------------+--------+-----------------------------------------+
+| 0xffffffa2 | 4 | SEV secret block size (=0xc00) |
++------------+--------+-----------------------------------------+
+| 0xffffffa6 | 2 | SEV secret block entry length (=0x1a) |
++------------+--------+-----------------------------------------+
+| 0xffffffa8 | 16 | SEV secret block GUID: |
+| | | 4c2eb361-7d9b-4cc3-8081-127c90d3d294 |
++------------+--------+-----------------------------------------+
+| 0xffffffb8 | 4 | SEV-ES AP reset RIP |
++------------+--------+-----------------------------------------+
+| 0xffffffbc | 2 | SEV-ES reset block entry length (=0x16) |
++------------+--------+-----------------------------------------+
+| 0xffffffbe | 16 | SEV-ES reset block entry GUID: |
+| | | 00f771de-1a7e-4fcb-890e-68c77e2fb44e |
++------------+--------+-----------------------------------------+
+| 0xffffffce | 2 | Length of entire table including table |
+| | | footer GUID and length (=0x72) |
++------------+--------+-----------------------------------------+
+| 0xffffffd0 | 16 | OVMF GUIDed table footer GUID: |
+| | | 96b582de-1fb2-45f7-baea-a366c55a082d |
++------------+--------+-----------------------------------------+
+| 0xffffffe0 | 8 | Application processor entry point code |
++------------+--------+-----------------------------------------+
+| 0xffffffe8 | 8 | "\0\0\0\0VTF\0" |
++------------+--------+-----------------------------------------+
+| 0xfffffff0 | 16 | Reset vector code |
++------------+--------+-----------------------------------------+
+
+
+Table entries description
+=========================
+
+SEV-ES reset block
+------------------
+
+Entry GUID: 00f771de-1a7e-4fcb-890e-68c77e2fb44e
+
+For the initial boot of an AP under SEV-ES, the "reset" RIP must be
+programmed to the RAM area defined by this entry. The entry's format
+is:
+
+* IP value [0:15]
+* CS segment base [31:16]
+
+A hypervisor reads the CS segment base and IP value. The CS segment
+base value represents the high order 16-bits of the CS segment base, so
+the hypervisor must left shift the value of the CS segment base by 16
+bits to form the full CS segment base for the CS segment register. It
+would then program the EIP register with the IP value as read.
+
+
+SEV secret block
+----------------
+
+Entry GUID: 4c2eb361-7d9b-4cc3-8081-127c90d3d294
+
+This describes the guest RAM area where the hypervisor should inject the
+Guest Owner secret (using SEV_LAUNCH_SECRET).
+
+
+SEV hashes table
+----------------
+
+Entry GUID: 7255371f-3a3b-4b04-927b-1da6efa8d454
+
+This describes the guest RAM area where the hypervisor should install a
+table describing the hashes of certain firmware configuration device
+files that would otherwise be passed in unchecked. The current use is
+for the kernel, initrd and command line values, but others may be added.
+
+
+.. _OVMF Reset Vector file:
+ https://github.com/tianocore/edk2/blob/master/OvmfPkg/ResetVector/Ia16/ResetVectorVtf0.asm
diff --git a/docs/system/riscv/virt.rst b/docs/system/riscv/virt.rst
index 08ce3c4177..1272b6659e 100644
--- a/docs/system/riscv/virt.rst
+++ b/docs/system/riscv/virt.rst
@@ -63,6 +63,22 @@ The following machine-specific options are supported:
When this option is "on", ACLINT devices will be emulated instead of
SiFive CLINT. When not specified, this option is assumed to be "off".
+- aia=[none|aplic|aplic-imsic]
+
+ This option allows selecting interrupt controller defined by the AIA
+ (advanced interrupt architecture) specification. The "aia=aplic" selects
+ APLIC (advanced platform level interrupt controller) to handle wired
+ interrupts whereas the "aia=aplic-imsic" selects APLIC and IMSIC (incoming
+ message signaled interrupt controller) to handle both wired interrupts and
+ MSIs. When not specified, this option is assumed to be "none" which selects
+ SiFive PLIC to handle wired interrupts.
+
+- aia-guests=nnn
+
+ The number of per-HART VS-level AIA IMSIC pages to be emulated for a guest
+ having AIA IMSIC (i.e. "aia=aplic-imsic" selected). When not specified,
+ the default number of per-HART VS-level AIA IMSIC pages is 0.
+
Running Linux kernel
--------------------
diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst
index 878e6a5c5c..8b97592663 100644
--- a/docs/tools/qemu-storage-daemon.rst
+++ b/docs/tools/qemu-storage-daemon.rst
@@ -154,6 +154,13 @@ Standard options:
created but before accepting connections. The daemon has started successfully
when the pid file is written and clients may begin connecting.
+.. option:: --daemonize
+
+ Daemonize the process. The parent process will exit once startup is complete
+ (i.e., after the pid file has been or would have been written) or failure
+ occurs. Its exit code reflects whether the child has started up successfully
+ or failed to do so.
+
Examples
--------
Launch the daemon with QMP monitor socket ``qmp.sock`` so clients can execute
diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
index 8fd89f0447..4997677460 100644
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -16,10 +16,17 @@
#include <dirent.h>
#include <utime.h>
-#include <sys/vfs.h>
#include "qemu-fsdev-throttle.h"
#include "p9array.h"
+#ifdef CONFIG_LINUX
+# include <sys/vfs.h>
+#endif
+#ifdef CONFIG_DARWIN
+# include <sys/param.h>
+# include <sys/mount.h>
+#endif
+
#define SM_LOCAL_MODE_BITS 0600
#define SM_LOCAL_DIR_MODE_BITS 0700
diff --git a/fsdev/meson.build b/fsdev/meson.build
index adf57cc43e..b632b66348 100644
--- a/fsdev/meson.build
+++ b/fsdev/meson.build
@@ -7,6 +7,7 @@ fsdev_ss.add(when: ['CONFIG_FSDEV_9P'], if_true: files(
'qemu-fsdev.c',
), if_false: files('qemu-fsdev-dummy.c'))
softmmu_ss.add_all(when: 'CONFIG_LINUX', if_true: fsdev_ss)
+softmmu_ss.add_all(when: 'CONFIG_DARWIN', if_true: fsdev_ss)
if have_virtfs_proxy_helper
executable('virtfs-proxy-helper',
diff --git a/fsdev/p9array.h b/fsdev/p9array.h
index 6aa25327ca..90e83a7c7b 100644
--- a/fsdev/p9array.h
+++ b/fsdev/p9array.h
@@ -81,11 +81,11 @@
*/
/**
- * Declares an array type for the passed @a scalar_type.
+ * P9ARRAY_DECLARE_TYPE() - Declares an array type for the passed @scalar_type.
*
- * This is typically used from a shared header file.
+ * @scalar_type: type of the individual array elements
*
- * @param scalar_type - type of the individual array elements
+ * This is typically used from a shared header file.
*/
#define P9ARRAY_DECLARE_TYPE(scalar_type) \
typedef struct P9Array##scalar_type { \
@@ -97,14 +97,14 @@
void p9array_auto_free_##scalar_type(scalar_type **auto_var); \
/**
- * Defines an array type for the passed @a scalar_type and appropriate
- * @a scalar_cleanup_func.
+ * P9ARRAY_DEFINE_TYPE() - Defines an array type for the passed @scalar_type
+ * and appropriate @scalar_cleanup_func.
*
- * This is typically used from a C unit file.
+ * @scalar_type: type of the individual array elements
+ * @scalar_cleanup_func: appropriate function to free memory dynamically
+ * allocated by individual array elements before
*
- * @param scalar_type - type of the individual array elements
- * @param scalar_cleanup_func - appropriate function to free memory dynamically
- * allocated by individual array elements before
+ * This is typically used from a C unit file.
*/
#define P9ARRAY_DEFINE_TYPE(scalar_type, scalar_cleanup_func) \
void p9array_new_##scalar_type(scalar_type **auto_var, size_t len) \
@@ -132,23 +132,27 @@
} \
/**
+ * P9ARRAY_REF() - Declare a reference variable for an array.
+ *
+ * @scalar_type: type of the individual array elements
+ *
* Used to declare a reference variable (unique pointer) for an array. After
* leaving the scope of the reference variable, the associated array is
* automatically freed.
- *
- * @param scalar_type - type of the individual array elements
*/
#define P9ARRAY_REF(scalar_type) \
__attribute((__cleanup__(p9array_auto_free_##scalar_type))) scalar_type*
/**
- * Allocates a new array of passed @a scalar_type with @a len number of array
- * elements and assigns the created array to the reference variable
- * @a auto_var.
+ * P9ARRAY_NEW() - Allocate a new array.
*
- * @param scalar_type - type of the individual array elements
- * @param auto_var - destination reference variable
- * @param len - amount of array elements to be allocated immediately
+ * @scalar_type: type of the individual array elements
+ * @auto_var: destination reference variable
+ * @len: amount of array elements to be allocated immediately
+ *
+ * Allocates a new array of passed @scalar_type with @len number of array
+ * elements and assigns the created array to the reference variable
+ * @auto_var.
*/
#define P9ARRAY_NEW(scalar_type, auto_var, len) \
QEMU_BUILD_BUG_MSG( \
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 210d9e7705..d42ce6d8b8 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -32,10 +32,12 @@
#include "qemu/error-report.h"
#include "qemu/option.h"
#include <libgen.h>
+#ifdef CONFIG_LINUX
#include <linux/fs.h>
#ifdef CONFIG_LINUX_MAGIC_H
#include <linux/magic.h>
#endif
+#endif
#include <sys/ioctl.h>
#ifndef XFS_SUPER_MAGIC
@@ -560,6 +562,15 @@ again:
if (!entry) {
return NULL;
}
+#ifdef CONFIG_DARWIN
+ int off;
+ off = telldir(fs->dir.stream);
+ /* If telldir fails, fail the entire readdir call */
+ if (off < 0) {
+ return NULL;
+ }
+ entry->d_seekoff = off;
+#endif
if (ctx->export_flags & V9FS_SM_MAPPED) {
entry->d_type = DT_UNKNOWN;
@@ -671,7 +682,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path,
if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
- err = mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
+ err = qemu_mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0);
if (err == -1) {
goto out;
}
@@ -686,7 +697,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path,
}
} else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
fs_ctx->export_flags & V9FS_SM_NONE) {
- err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
+ err = qemu_mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
if (err == -1) {
goto out;
}
@@ -779,16 +790,20 @@ static int local_fstat(FsContext *fs_ctx, int fid_type,
mode_t tmp_mode;
dev_t tmp_dev;
- if (fgetxattr(fd, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) {
+ if (qemu_fgetxattr(fd, "user.virtfs.uid",
+ &tmp_uid, sizeof(uid_t)) > 0) {
stbuf->st_uid = le32_to_cpu(tmp_uid);
}
- if (fgetxattr(fd, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) {
+ if (qemu_fgetxattr(fd, "user.virtfs.gid",
+ &tmp_gid, sizeof(gid_t)) > 0) {
stbuf->st_gid = le32_to_cpu(tmp_gid);
}
- if (fgetxattr(fd, "user.virtfs.mode", &tmp_mode, sizeof(mode_t)) > 0) {
+ if (qemu_fgetxattr(fd, "user.virtfs.mode",
+ &tmp_mode, sizeof(mode_t)) > 0) {
stbuf->st_mode = le32_to_cpu(tmp_mode);
}
- if (fgetxattr(fd, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) {
+ if (qemu_fgetxattr(fd, "user.virtfs.rdev",
+ &tmp_dev, sizeof(dev_t)) > 0) {
stbuf->st_rdev = le64_to_cpu(tmp_dev);
}
} else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
diff --git a/hw/9pfs/9p-proxy.c b/hw/9pfs/9p-proxy.c
index 09bd9f1464..8b4b5cf7dc 100644
--- a/hw/9pfs/9p-proxy.c
+++ b/hw/9pfs/9p-proxy.c
@@ -123,10 +123,16 @@ static void prstatfs_to_statfs(struct statfs *stfs, ProxyStatFS *prstfs)
stfs->f_bavail = prstfs->f_bavail;
stfs->f_files = prstfs->f_files;
stfs->f_ffree = prstfs->f_ffree;
+#ifdef CONFIG_DARWIN
+ /* f_namelen and f_frsize do not exist on Darwin */
+ stfs->f_fsid.val[0] = prstfs->f_fsid[0] & 0xFFFFFFFFU;
+ stfs->f_fsid.val[1] = prstfs->f_fsid[1] >> 32 & 0xFFFFFFFFU;
+#else
stfs->f_fsid.__val[0] = prstfs->f_fsid[0] & 0xFFFFFFFFU;
stfs->f_fsid.__val[1] = prstfs->f_fsid[1] >> 32 & 0xFFFFFFFFU;
stfs->f_namelen = prstfs->f_namelen;
stfs->f_frsize = prstfs->f_frsize;
+#endif
}
/* Converts proxy_stat structure to VFS stat structure */
@@ -143,12 +149,24 @@ static void prstat_to_stat(struct stat *stbuf, ProxyStat *prstat)
stbuf->st_size = prstat->st_size;
stbuf->st_blksize = prstat->st_blksize;
stbuf->st_blocks = prstat->st_blocks;
+ stbuf->st_atime = prstat->st_atim_sec;
+ stbuf->st_mtime = prstat->st_mtim_sec;
+ stbuf->st_ctime = prstat->st_ctim_sec;
+#ifdef CONFIG_DARWIN
+ stbuf->st_atimespec.tv_sec = prstat->st_atim_sec;
+ stbuf->st_mtimespec.tv_sec = prstat->st_mtim_sec;
+ stbuf->st_ctimespec.tv_sec = prstat->st_ctim_sec;
+ stbuf->st_atimespec.tv_nsec = prstat->st_atim_nsec;
+ stbuf->st_mtimespec.tv_nsec = prstat->st_mtim_nsec;
+ stbuf->st_ctimespec.tv_nsec = prstat->st_ctim_nsec;
+#else
stbuf->st_atim.tv_sec = prstat->st_atim_sec;
+ stbuf->st_mtim.tv_sec = prstat->st_mtim_sec;
+ stbuf->st_ctim.tv_sec = prstat->st_ctim_sec;
stbuf->st_atim.tv_nsec = prstat->st_atim_nsec;
- stbuf->st_mtime = prstat->st_mtim_sec;
stbuf->st_mtim.tv_nsec = prstat->st_mtim_nsec;
- stbuf->st_ctime = prstat->st_ctim_sec;
stbuf->st_ctim.tv_nsec = prstat->st_ctim_nsec;
+#endif
}
/*
@@ -688,7 +706,21 @@ static off_t proxy_telldir(FsContext *ctx, V9fsFidOpenState *fs)
static struct dirent *proxy_readdir(FsContext *ctx, V9fsFidOpenState *fs)
{
- return readdir(fs->dir.stream);
+ struct dirent *entry;
+ entry = readdir(fs->dir.stream);
+#ifdef CONFIG_DARWIN
+ if (!entry) {
+ return NULL;
+ }
+ int td;
+ td = telldir(fs->dir.stream);
+ /* If telldir fails, fail the entire readdir call */
+ if (td < 0) {
+ return NULL;
+ }
+ entry->d_seekoff = td;
+#endif
+ return entry;
}
static void proxy_seekdir(FsContext *ctx, V9fsFidOpenState *fs, off_t off)
diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c
index 7a7cd5c5ba..b3080e415b 100644
--- a/hw/9pfs/9p-synth.c
+++ b/hw/9pfs/9p-synth.c
@@ -234,7 +234,11 @@ static void synth_direntry(V9fsSynthNode *node,
offsetof(struct dirent, d_name) + sz);
memcpy(entry->d_name, node->name, sz);
entry->d_ino = node->attr->inode;
+#ifdef CONFIG_DARWIN
+ entry->d_seekoff = off + 1;
+#else
entry->d_off = off + 1;
+#endif
}
static struct dirent *synth_get_dentry(V9fsSynthNode *dir,
@@ -439,7 +443,9 @@ static int synth_statfs(FsContext *s, V9fsPath *fs_path,
stbuf->f_bsize = 512;
stbuf->f_blocks = 0;
stbuf->f_files = synth_node_count;
+#ifndef CONFIG_DARWIN
stbuf->f_namelen = NAME_MAX;
+#endif
return 0;
}
diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c
new file mode 100644
index 0000000000..bec0253474
--- /dev/null
+++ b/hw/9pfs/9p-util-darwin.c
@@ -0,0 +1,97 @@
+/*
+ * 9p utilities (Darwin Implementation)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/xattr.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "9p-util.h"
+
+ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+ void *value, size_t size)
+{
+ int ret;
+ int fd = openat_file(dirfd, filename,
+ O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+ if (fd == -1) {
+ return -1;
+ }
+ ret = fgetxattr(fd, name, value, size, 0, 0);
+ close_preserve_errno(fd);
+ return ret;
+}
+
+ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
+ char *list, size_t size)
+{
+ int ret;
+ int fd = openat_file(dirfd, filename,
+ O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+ if (fd == -1) {
+ return -1;
+ }
+ ret = flistxattr(fd, list, size, 0);
+ close_preserve_errno(fd);
+ return ret;
+}
+
+ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
+ const char *name)
+{
+ int ret;
+ int fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+ if (fd == -1) {
+ return -1;
+ }
+ ret = fremovexattr(fd, name, 0);
+ close_preserve_errno(fd);
+ return ret;
+}
+
+int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+ void *value, size_t size, int flags)
+{
+ int ret;
+ int fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+ if (fd == -1) {
+ return -1;
+ }
+ ret = fsetxattr(fd, name, value, size, 0, flags);
+ close_preserve_errno(fd);
+ return ret;
+}
+
+/*
+ * As long as mknodat is not available on macOS, this workaround
+ * using pthread_fchdir_np is needed.
+ *
+ * Radar filed with Apple for implementing mknodat:
+ * rdar://FB9862426 (https://openradar.appspot.com/FB9862426)
+ */
+#if defined CONFIG_PTHREAD_FCHDIR_NP
+
+int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
+{
+ int preserved_errno, err;
+ if (!pthread_fchdir_np) {
+ error_report_once("pthread_fchdir_np() not available on this version of macOS");
+ return -ENOTSUP;
+ }
+ if (pthread_fchdir_np(dirfd) < 0) {
+ return -1;
+ }
+ err = mknod(filename, mode, dev);
+ preserved_errno = errno;
+ /* Stop using the thread-local cwd */
+ pthread_fchdir_np(-1);
+ if (err < 0) {
+ errno = preserved_errno;
+ }
+ return err;
+}
+
+#endif
diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util-linux.c
index 3221d9b498..db451b0784 100644
--- a/hw/9pfs/9p-util.c
+++ b/hw/9pfs/9p-util-linux.c
@@ -1,5 +1,5 @@
/*
- * 9p utilities
+ * 9p utilities (Linux Implementation)
*
* Copyright IBM, Corp. 2017
*
@@ -61,4 +61,10 @@ int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
ret = lsetxattr(proc_path, name, value, size, flags);
g_free(proc_path);
return ret;
+
+}
+
+int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
+{
+ return mknodat(dirfd, filename, mode, dev);
}
diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
index 546f46dc7d..97e681e167 100644
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@@ -19,6 +19,23 @@
#define O_PATH_9P_UTIL 0
#endif
+#ifdef CONFIG_DARWIN
+#define qemu_fgetxattr(...) fgetxattr(__VA_ARGS__, 0, 0)
+#define qemu_lgetxattr(...) getxattr(__VA_ARGS__, 0, XATTR_NOFOLLOW)
+#define qemu_llistxattr(...) listxattr(__VA_ARGS__, XATTR_NOFOLLOW)
+#define qemu_lremovexattr(...) removexattr(__VA_ARGS__, XATTR_NOFOLLOW)
+static inline int qemu_lsetxattr(const char *path, const char *name,
+ const void *value, size_t size, int flags) {
+ return setxattr(path, name, value, size, 0, flags | XATTR_NOFOLLOW);
+}
+#else
+#define qemu_fgetxattr fgetxattr
+#define qemu_lgetxattr lgetxattr
+#define qemu_llistxattr llistxattr
+#define qemu_lremovexattr lremovexattr
+#define qemu_lsetxattr lsetxattr
+#endif
+
static inline void close_preserve_errno(int fd)
{
int serrno = errno;
@@ -37,10 +54,13 @@ static inline int openat_file(int dirfd, const char *name, int flags,
{
int fd, serrno, ret;
+#ifndef CONFIG_DARWIN
again:
+#endif
fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK,
mode);
if (fd == -1) {
+#ifndef CONFIG_DARWIN
if (errno == EPERM && (flags & O_NOATIME)) {
/*
* The client passed O_NOATIME but we lack permissions to honor it.
@@ -53,6 +73,7 @@ again:
flags &= ~O_NOATIME;
goto again;
}
+#endif
return -1;
}
@@ -78,4 +99,61 @@ ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
const char *name);
+/*
+ * Darwin has d_seekoff, which appears to function similarly to d_off.
+ * However, it does not appear to be supported on all file systems,
+ * so ensure it is manually injected earlier and call here when
+ * needed.
+ */
+static inline off_t qemu_dirent_off(struct dirent *dent)
+{
+#ifdef CONFIG_DARWIN
+ return dent->d_seekoff;
+#else
+ return dent->d_off;
+#endif
+}
+
+/**
+ * qemu_dirent_dup() - Duplicate directory entry @dent.
+ *
+ * @dent: original directory entry to be duplicated
+ * Return: duplicated directory entry which should be freed with g_free()
+ *
+ * It is highly recommended to use this function instead of open coding
+ * duplication of dirent objects, because the actual struct dirent
+ * size may be bigger or shorter than sizeof(struct dirent) and correct
+ * handling is platform specific (see gitlab issue #841).
+ */
+static inline struct dirent *qemu_dirent_dup(struct dirent *dent)
+{
+ size_t sz = 0;
+#if defined _DIRENT_HAVE_D_RECLEN
+ /* Avoid use of strlen() if platform supports d_reclen. */
+ sz = dent->d_reclen;
+#endif
+ /*
+ * Test sz for zero even if d_reclen is available
+ * because some drivers may set d_reclen to zero.
+ */
+ if (sz == 0) {
+ /* Fallback to the most portable way. */
+ sz = offsetof(struct dirent, d_name) +
+ strlen(dent->d_name) + 1;
+ }
+ return g_memdup(dent, sz);
+}
+
+/*
+ * As long as mknodat is not available on macOS, this workaround
+ * using pthread_fchdir_np is needed. qemu_mknodat is defined in
+ * os-posix.c. pthread_fchdir_np is weakly linked here as a guard
+ * in case it disappears in future macOS versions, because it is
+ * is a private API.
+ */
+#if defined CONFIG_DARWIN && defined CONFIG_PTHREAD_FCHDIR_NP
+int pthread_fchdir_np(int fd) __attribute__((weak_import));
+#endif
+int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev);
+
#endif
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 15b3f4d385..a6d6b3f835 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -27,12 +27,17 @@
#include "virtio-9p.h"
#include "fsdev/qemu-fsdev.h"
#include "9p-xattr.h"
+#include "9p-util.h"
#include "coth.h"
#include "trace.h"
#include "migration/blocker.h"
#include "qemu/xxhash.h"
#include <math.h>
+#ifdef CONFIG_LINUX
#include <linux/limits.h>
+#else
+#include <limits.h>
+#endif
int open_fd_hw;
int total_open_fd;
@@ -133,11 +138,20 @@ static int dotl_to_open_flags(int flags)
{ P9_DOTL_NONBLOCK, O_NONBLOCK } ,
{ P9_DOTL_DSYNC, O_DSYNC },
{ P9_DOTL_FASYNC, FASYNC },
+#ifndef CONFIG_DARWIN
+ { P9_DOTL_NOATIME, O_NOATIME },
+ /*
+ * On Darwin, we could map to F_NOCACHE, which is
+ * similar, but doesn't quite have the same
+ * semantics. However, we don't support O_DIRECT
+ * even on linux at the moment, so we just ignore
+ * it here.
+ */
{ P9_DOTL_DIRECT, O_DIRECT },
+#endif
{ P9_DOTL_LARGEFILE, O_LARGEFILE },
{ P9_DOTL_DIRECTORY, O_DIRECTORY },
{ P9_DOTL_NOFOLLOW, O_NOFOLLOW },
- { P9_DOTL_NOATIME, O_NOATIME },
{ P9_DOTL_SYNC, O_SYNC },
};
@@ -166,10 +180,12 @@ static int get_dotl_openflags(V9fsState *s, int oflags)
*/
flags = dotl_to_open_flags(oflags);
flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
+#ifndef CONFIG_DARWIN
/*
* Ignore direct disk access hint until the server supports it.
*/
flags &= ~O_DIRECT;
+#endif
return flags;
}
@@ -612,8 +628,8 @@ static inline uint64_t mirror64bit(uint64_t value)
((uint64_t)mirror8bit((value >> 56) & 0xff));
}
-/**
- * @brief Parameter k for the Exponential Golomb algorihm to be used.
+/*
+ * Parameter k for the Exponential Golomb algorihm to be used.
*
* The smaller this value, the smaller the minimum bit count for the Exp.
* Golomb generated affixes will be (at lowest index) however for the
@@ -626,28 +642,30 @@ static inline uint64_t mirror64bit(uint64_t value)
* should be small, for a large amount of devices k might be increased
* instead. The default of k=0 should be fine for most users though.
*
- * @b IMPORTANT: In case this ever becomes a runtime parameter; the value of
+ * IMPORTANT: In case this ever becomes a runtime parameter; the value of
* k should not change as long as guest is still running! Because that would
* cause completely different inode numbers to be generated on guest.
*/
#define EXP_GOLOMB_K 0
/**
- * @brief Exponential Golomb algorithm for arbitrary k (including k=0).
+ * expGolombEncode() - Exponential Golomb algorithm for arbitrary k
+ * (including k=0).
+ *
+ * @n: natural number (or index) of the prefix to be generated
+ * (1, 2, 3, ...)
+ * @k: parameter k of Exp. Golomb algorithm to be used
+ * (see comment on EXP_GOLOMB_K macro for details about k)
+ * Return: prefix for given @n and @k
*
- * The Exponential Golomb algorithm generates @b prefixes (@b not suffixes!)
+ * The Exponential Golomb algorithm generates prefixes (NOT suffixes!)
* with growing length and with the mathematical property of being
* "prefix-free". The latter means the generated prefixes can be prepended
* in front of arbitrary numbers and the resulting concatenated numbers are
* guaranteed to be always unique.
*
* This is a minor adjustment to the original Exp. Golomb algorithm in the
- * sense that lowest allowed index (@param n) starts with 1, not with zero.
- *
- * @param n - natural number (or index) of the prefix to be generated
- * (1, 2, 3, ...)
- * @param k - parameter k of Exp. Golomb algorithm to be used
- * (see comment on EXP_GOLOMB_K macro for details about k)
+ * sense that lowest allowed index (@n) starts with 1, not with zero.
*/
static VariLenAffix expGolombEncode(uint64_t n, int k)
{
@@ -661,7 +679,9 @@ static VariLenAffix expGolombEncode(uint64_t n, int k)
}
/**
- * @brief Converts a suffix into a prefix, or a prefix into a suffix.
+ * invertAffix() - Converts a suffix into a prefix, or a prefix into a suffix.
+ * @affix: either suffix or prefix to be inverted
+ * Return: inversion of passed @affix
*
* Simply mirror all bits of the affix value, for the purpose to preserve
* respectively the mathematical "prefix-free" or "suffix-free" property
@@ -685,16 +705,16 @@ static VariLenAffix invertAffix(const VariLenAffix *affix)
}
/**
- * @brief Generates suffix numbers with "suffix-free" property.
+ * affixForIndex() - Generates suffix numbers with "suffix-free" property.
+ * @index: natural number (or index) of the suffix to be generated
+ * (1, 2, 3, ...)
+ * Return: Suffix suitable to assemble unique number.
*
* This is just a wrapper function on top of the Exp. Golomb algorithm.
*
* Since the Exp. Golomb algorithm generates prefixes, but we need suffixes,
* this function converts the Exp. Golomb prefixes into appropriate suffixes
* which are still suitable for generating unique numbers.
- *
- * @param n - natural number (or index) of the suffix to be generated
- * (1, 2, 3, ...)
*/
static VariLenAffix affixForIndex(uint64_t index)
{
@@ -794,8 +814,8 @@ static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev)
return val->prefix_bits;
}
-/**
- * @brief Slow / full mapping host inode nr -> guest inode nr.
+/*
+ * Slow / full mapping host inode nr -> guest inode nr.
*
* This function performs a slower and much more costly remapping of an
* original file inode number on host to an appropriate different inode
@@ -807,7 +827,7 @@ static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev)
* qid_path_suffixmap() failed. In practice this slow / full mapping is not
* expected ever to be used at all though.
*
- * @see qid_path_suffixmap() for details
+ * See qid_path_suffixmap() for details
*
*/
static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf,
@@ -848,8 +868,8 @@ static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf,
return 0;
}
-/**
- * @brief Quick mapping host inode nr -> guest inode nr.
+/*
+ * Quick mapping host inode nr -> guest inode nr.
*
* This function performs quick remapping of an original file inode number
* on host to an appropriate different inode number on guest. This remapping
@@ -1265,12 +1285,15 @@ static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,
/**
- * Convert host filesystem's block size into an appropriate block size for
- * 9p client (guest OS side). The value returned suggests an "optimum" block
- * size for 9p I/O, i.e. to maximize performance.
+ * blksize_to_iounit() - Block size exposed to 9p client.
+ * Return: block size
*
* @pdu: 9p client request
* @blksize: host filesystem's block size
+ *
+ * Convert host filesystem's block size into an appropriate block size for
+ * 9p client (guest OS side). The value returned suggests an "optimum" block
+ * size for 9p I/O, i.e. to maximize performance.
*/
static int32_t blksize_to_iounit(const V9fsPDU *pdu, int32_t blksize)
{
@@ -1309,11 +1332,17 @@ static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf,
v9lstat->st_blksize = stat_to_iounit(pdu, stbuf);
v9lstat->st_blocks = stbuf->st_blocks;
v9lstat->st_atime_sec = stbuf->st_atime;
- v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
v9lstat->st_mtime_sec = stbuf->st_mtime;
- v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
v9lstat->st_ctime_sec = stbuf->st_ctime;
+#ifdef CONFIG_DARWIN
+ v9lstat->st_atime_nsec = stbuf->st_atimespec.tv_nsec;
+ v9lstat->st_mtime_nsec = stbuf->st_mtimespec.tv_nsec;
+ v9lstat->st_ctime_nsec = stbuf->st_ctimespec.tv_nsec;
+#else
+ v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
+ v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
+#endif
/* Currently we only support BASIC fields in stat */
v9lstat->st_result_mask = P9_STATS_BASIC;
@@ -2271,7 +2300,7 @@ static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
count += len;
v9fs_stat_free(&v9stat);
v9fs_path_free(&path);
- saved_dir_pos = dent->d_off;
+ saved_dir_pos = qemu_dirent_off(dent);
}
v9fs_readdir_unlock(&fidp->fs.dir);
@@ -2376,10 +2405,11 @@ out_nofid:
}
/**
- * Returns size required in Rreaddir response for the passed dirent @p name.
+ * v9fs_readdir_response_size() - Returns size required in Rreaddir response
+ * for the passed dirent @name.
*
- * @param name - directory entry's name (i.e. file name, directory name)
- * @returns required size in bytes
+ * @name: directory entry's name (i.e. file name, directory name)
+ * Return: required size in bytes
*/
size_t v9fs_readdir_response_size(V9fsString *name)
{
@@ -2410,6 +2440,7 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
V9fsString name;
int len, err = 0;
int32_t count = 0;
+ off_t off;
struct dirent *dent;
struct stat *st;
struct V9fsDirEnt *entries = NULL;
@@ -2470,12 +2501,13 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
qid.version = 0;
}
+ off = qemu_dirent_off(dent);
v9fs_string_init(&name);
v9fs_string_sprintf(&name, "%s", dent->d_name);
/* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
len = pdu_marshal(pdu, 11 + count, "Qqbs",
- &qid, dent->d_off,
+ &qid, off,
dent->d_type, &name);
v9fs_string_free(&name);
@@ -3515,9 +3547,15 @@ static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
f_bavail = stbuf->f_bavail / bsize_factor;
f_files = stbuf->f_files;
f_ffree = stbuf->f_ffree;
+#ifdef CONFIG_DARWIN
+ fsid_val = (unsigned int)stbuf->f_fsid.val[0] |
+ (unsigned long long)stbuf->f_fsid.val[1] << 32;
+ f_namelen = NAME_MAX;
+#else
fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
(unsigned long long)stbuf->f_fsid.__val[1] << 32;
f_namelen = stbuf->f_namelen;
+#endif
return pdu_marshal(pdu, offset, "ddqqqqqqd",
f_type, f_bsize, f_blocks, f_bfree,
@@ -3919,7 +3957,7 @@ static void coroutine_fn v9fs_xattrcreate(void *opaque)
rflags |= XATTR_REPLACE;
}
- if (size > XATTR_SIZE_MAX) {
+ if (size > P9_XATTR_SIZE_MAX) {
err = -E2BIG;
goto out_nofid;
}
diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index 1567b67841..af2635fae9 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -100,8 +100,8 @@ typedef enum P9ProtoVersion {
V9FS_PROTO_2000L = 0x02,
} P9ProtoVersion;
-/**
- * @brief Minimum message size supported by this 9pfs server.
+/*
+ * Minimum message size supported by this 9pfs server.
*
* A client establishes a session by sending a Tversion request along with a
* 'msize' parameter which suggests the server a maximum message size ever to be
@@ -231,7 +231,7 @@ static inline void v9fs_readdir_init(P9ProtoVersion proto_version, V9fsDir *dir)
}
}
-/**
+/*
* Type for 9p fs drivers' (a.k.a. 9p backends) result of readdir requests,
* which is a chained list of directory entries.
*/
@@ -289,8 +289,8 @@ typedef enum AffixType_t {
AffixType_Suffix, /* A.k.a. postfix. */
} AffixType_t;
-/**
- * @brief Unique affix of variable length.
+/*
+ * Unique affix of variable length.
*
* An affix is (currently) either a suffix or a prefix, which is either
* going to be prepended (prefix) or appended (suffix) with some other
@@ -304,7 +304,7 @@ typedef struct VariLenAffix {
AffixType_t type; /* Whether this affix is a suffix or a prefix. */
uint64_t value; /* Actual numerical value of this affix. */
/*
- * Lenght of the affix, that is how many (of the lowest) bits of @c value
+ * Lenght of the affix, that is how many (of the lowest) bits of ``value``
* must be used for appending/prepending this affix to its final resulting,
* unique number.
*/
@@ -479,4 +479,22 @@ struct V9fsTransport {
void (*push_and_notify)(V9fsPDU *pdu);
};
+#if defined(XATTR_SIZE_MAX)
+/* Linux */
+#define P9_XATTR_SIZE_MAX XATTR_SIZE_MAX
+#elif defined(CONFIG_DARWIN)
+/*
+ * Darwin doesn't seem to define a maximum xattr size in its user
+ * space header, so manually configure it across platforms as 64k.
+ *
+ * Having no limit at all can lead to QEMU crashing during large g_malloc()
+ * calls. Because QEMU does not currently support macOS guests, the below
+ * preliminary solution only works due to its being a reflection of the limit of
+ * Linux guests.
+ */
+#define P9_XATTR_SIZE_MAX 65536
+#else
+#error Missing definition for P9_XATTR_SIZE_MAX for this host system
+#endif
+
#endif
diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
index c0873bde16..75148bc985 100644
--- a/hw/9pfs/codir.c
+++ b/hw/9pfs/codir.c
@@ -22,6 +22,8 @@
#include "qemu/coroutine.h"
#include "qemu/main-loop.h"
#include "coth.h"
+#include "9p-xattr.h"
+#include "9p-util.h"
/*
* Intended to be called from bottom-half (e.g. background I/O thread)
@@ -166,7 +168,7 @@ static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
}
size += len;
- saved_dir_pos = dent->d_off;
+ saved_dir_pos = qemu_dirent_off(dent);
}
/* restore (last) saved position */
@@ -182,14 +184,25 @@ out:
}
/**
- * @brief Reads multiple directory entries in one rush.
+ * v9fs_co_readdir_many() - Reads multiple directory entries in one rush.
+ *
+ * @pdu: the causing 9p (T_readdir) client request
+ * @fidp: already opened directory where readdir shall be performed on
+ * @entries: output for directory entries (must not be NULL)
+ * @offset: initial position inside the directory the function shall
+ * seek to before retrieving the directory entries
+ * @maxsize: maximum result message body size (in bytes)
+ * @dostat: whether a stat() should be performed and returned for
+ * each directory entry
+ * Return: resulting response message body size (in bytes) on success,
+ * negative error code otherwise
*
* Retrieves the requested (max. amount of) directory entries from the fs
* driver. This function must only be called by the main IO thread (top half).
* Internally this function call will be dispatched to a background IO thread
* (bottom half) where it is eventually executed by the fs driver.
*
- * @discussion Acquiring multiple directory entries in one rush from the fs
+ * Acquiring multiple directory entries in one rush from the fs
* driver, instead of retrieving each directory entry individually, is very
* beneficial from performance point of view. Because for every fs driver
* request latency is added, which in practice could lead to overall
@@ -197,20 +210,9 @@ out:
* directory) if every directory entry was individually requested from fs
* driver.
*
- * @note You must @b ALWAYS call @c v9fs_free_dirents(entries) after calling
+ * NOTE: You must ALWAYS call v9fs_free_dirents(entries) after calling
* v9fs_co_readdir_many(), both on success and on error cases of this
- * function, to avoid memory leaks once @p entries are no longer needed.
- *
- * @param pdu - the causing 9p (T_readdir) client request
- * @param fidp - already opened directory where readdir shall be performed on
- * @param entries - output for directory entries (must not be NULL)
- * @param offset - initial position inside the directory the function shall
- * seek to before retrieving the directory entries
- * @param maxsize - maximum result message body size (in bytes)
- * @param dostat - whether a stat() should be performed and returned for
- * each directory entry
- * @returns resulting response message body size (in bytes) on success,
- * negative error code otherwise
+ * function, to avoid memory leaks once @entries are no longer needed.
*/
int coroutine_fn v9fs_co_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
struct V9fsDirEnt **entries,
diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h
index f83c7dda7b..1a1edbdc2a 100644
--- a/hw/9pfs/coth.h
+++ b/hw/9pfs/coth.h
@@ -19,7 +19,7 @@
#include "qemu/coroutine.h"
#include "9p.h"
-/**
+/*
* we want to use bottom half because we want to make sure the below
* sequence of events.
*
@@ -29,7 +29,7 @@
* we cannot swap step 1 and 2, because that would imply worker thread
* can enter coroutine while step1 is still running
*
- * @b PERFORMANCE @b CONSIDERATIONS: As a rule of thumb, keep in mind
+ * PERFORMANCE CONSIDERATIONS: As a rule of thumb, keep in mind
* that hopping between threads adds @b latency! So when handling a
* 9pfs request, avoid calling v9fs_co_run_in_worker() too often, because
* this might otherwise sum up to a significant, huge overall latency for
diff --git a/hw/9pfs/meson.build b/hw/9pfs/meson.build
index 99be5d9119..12443b6ad5 100644
--- a/hw/9pfs/meson.build
+++ b/hw/9pfs/meson.build
@@ -4,7 +4,6 @@ fs_ss.add(files(
'9p-posix-acl.c',
'9p-proxy.c',
'9p-synth.c',
- '9p-util.c',
'9p-xattr-user.c',
'9p-xattr.c',
'9p.c',
@@ -14,6 +13,8 @@ fs_ss.add(files(
'coth.c',
'coxattr.c',
))
+fs_ss.add(when: 'CONFIG_LINUX', if_true: files('9p-util-linux.c'))
+fs_ss.add(when: 'CONFIG_DARWIN', if_true: files('9p-util-darwin.c'))
fs_ss.add(when: 'CONFIG_XEN', if_true: files('xen-9p-backend.c'))
softmmu_ss.add_all(when: 'CONFIG_FSDEV_9P', if_true: fs_ss)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 46bf7ceddf..46a42502bc 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2102,6 +2102,10 @@ static void machvirt_init(MachineState *machine)
object_property_set_bool(cpuobj, "pmu", false, NULL);
}
+ if (vmc->no_tcg_lpa2 && object_property_find(cpuobj, "lpa2")) {
+ object_property_set_bool(cpuobj, "lpa2", false, NULL);
+ }
+
if (object_property_find(cpuobj, "reset-cbar")) {
object_property_set_int(cpuobj, "reset-cbar",
vms->memmap[VIRT_CPUPERIPHS].base,
@@ -3020,8 +3024,11 @@ DEFINE_VIRT_MACHINE_AS_LATEST(7, 0)
static void virt_machine_6_2_options(MachineClass *mc)
{
+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+
virt_machine_7_0_options(mc);
compat_props_add(mc->compat_props, hw_compat_6_2, hw_compat_6_2_len);
+ vmc->no_tcg_lpa2 = true;
}
DEFINE_VIRT_MACHINE(6, 2)
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 860787580a..2785b9e849 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -21,6 +21,7 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
+#include "qemu/memalign.h"
#include "qapi/error.h"
#include "hw/xen/xen_common.h"
#include "hw/block/xen_blkif.h"
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 21d18ac2e3..347875a0cd 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -32,6 +32,7 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/timer.h"
+#include "qemu/memalign.h"
#include "hw/irq.h"
#include "hw/isa/isa.h"
#include "hw/qdev-properties.h"
diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 81f9f971d8..74c7190302 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -1023,7 +1023,7 @@ static void postload_update_cb(void *opaque, bool running, RunState state)
{
PFlashCFI01 *pfl = opaque;
- /* This is called after bdrv_invalidate_cache_all. */
+ /* This is called after bdrv_activate_all. */
qemu_del_vm_change_state_handler(pfl->vmstate);
pfl->vmstate = NULL;
diff --git a/hw/display/edid-generate.c b/hw/display/edid-generate.c
index bccf32af69..2cb819675e 100644
--- a/hw/display/edid-generate.c
+++ b/hw/display/edid-generate.c
@@ -255,33 +255,31 @@ static void edid_desc_dummy(uint8_t *desc)
edid_desc_type(desc, 0x10);
}
-static void edid_desc_timing(uint8_t *desc, uint32_t refresh_rate,
+static void edid_desc_timing(uint8_t *desc, const Timings *timings,
uint32_t xres, uint32_t yres,
uint32_t xmm, uint32_t ymm)
{
- Timings timings;
- generate_timings(&timings, refresh_rate, xres, yres);
- stl_le_p(desc, timings.clock);
+ stw_le_p(desc, timings->clock);
desc[2] = xres & 0xff;
- desc[3] = timings.xblank & 0xff;
+ desc[3] = timings->xblank & 0xff;
desc[4] = (((xres & 0xf00) >> 4) |
- ((timings.xblank & 0xf00) >> 8));
+ ((timings->xblank & 0xf00) >> 8));
desc[5] = yres & 0xff;
- desc[6] = timings.yblank & 0xff;
+ desc[6] = timings->yblank & 0xff;
desc[7] = (((yres & 0xf00) >> 4) |
- ((timings.yblank & 0xf00) >> 8));
+ ((timings->yblank & 0xf00) >> 8));
- desc[8] = timings.xfront & 0xff;
- desc[9] = timings.xsync & 0xff;
+ desc[8] = timings->xfront & 0xff;
+ desc[9] = timings->xsync & 0xff;
- desc[10] = (((timings.yfront & 0x00f) << 4) |
- ((timings.ysync & 0x00f) << 0));
- desc[11] = (((timings.xfront & 0x300) >> 2) |
- ((timings.xsync & 0x300) >> 4) |
- ((timings.yfront & 0x030) >> 2) |
- ((timings.ysync & 0x030) >> 4));
+ desc[10] = (((timings->yfront & 0x00f) << 4) |
+ ((timings->ysync & 0x00f) << 0));
+ desc[11] = (((timings->xfront & 0x300) >> 2) |
+ ((timings->xsync & 0x300) >> 4) |
+ ((timings->yfront & 0x030) >> 2) |
+ ((timings->ysync & 0x030) >> 4));
desc[12] = xmm & 0xff;
desc[13] = ymm & 0xff;
@@ -348,13 +346,10 @@ static void init_displayid(uint8_t *did)
edid_checksum(did + 1, did[2] + 4);
}
-static void qemu_displayid_generate(uint8_t *did, uint32_t refresh_rate,
+static void qemu_displayid_generate(uint8_t *did, const Timings *timings,
uint32_t xres, uint32_t yres,
uint32_t xmm, uint32_t ymm)
{
- Timings timings;
- generate_timings(&timings, refresh_rate, xres, yres);
-
did[0] = 0x70; /* display id extension */
did[1] = 0x13; /* version 1.3 */
did[2] = 23; /* length */
@@ -364,21 +359,21 @@ static void qemu_displayid_generate(uint8_t *did, uint32_t refresh_rate,
did[6] = 0x00; /* revision */
did[7] = 0x14; /* block length */
- did[8] = timings.clock & 0xff;
- did[9] = (timings.clock & 0xff00) >> 8;
- did[10] = (timings.clock & 0xff0000) >> 16;
+ did[8] = timings->clock & 0xff;
+ did[9] = (timings->clock & 0xff00) >> 8;
+ did[10] = (timings->clock & 0xff0000) >> 16;
did[11] = 0x88; /* leave aspect ratio undefined */
stw_le_p(did + 12, 0xffff & (xres - 1));
- stw_le_p(did + 14, 0xffff & (timings.xblank - 1));
- stw_le_p(did + 16, 0xffff & (timings.xfront - 1));
- stw_le_p(did + 18, 0xffff & (timings.xsync - 1));
+ stw_le_p(did + 14, 0xffff & (timings->xblank - 1));
+ stw_le_p(did + 16, 0xffff & (timings->xfront - 1));
+ stw_le_p(did + 18, 0xffff & (timings->xsync - 1));
stw_le_p(did + 20, 0xffff & (yres - 1));
- stw_le_p(did + 22, 0xffff & (timings.yblank - 1));
- stw_le_p(did + 24, 0xffff & (timings.yfront - 1));
- stw_le_p(did + 26, 0xffff & (timings.ysync - 1));
+ stw_le_p(did + 22, 0xffff & (timings->yblank - 1));
+ stw_le_p(did + 24, 0xffff & (timings->yfront - 1));
+ stw_le_p(did + 26, 0xffff & (timings->ysync - 1));
edid_checksum(did + 1, did[2] + 4);
}
@@ -386,6 +381,7 @@ static void qemu_displayid_generate(uint8_t *did, uint32_t refresh_rate,
void qemu_edid_generate(uint8_t *edid, size_t size,
qemu_edid_info *info)
{
+ Timings timings;
uint8_t *desc = edid + 54;
uint8_t *xtra3 = NULL;
uint8_t *dta = NULL;
@@ -409,9 +405,6 @@ void qemu_edid_generate(uint8_t *edid, size_t size,
if (!info->prefy) {
info->prefy = 800;
}
- if (info->prefx >= 4096 || info->prefy >= 4096) {
- large_screen = 1;
- }
if (info->width_mm && info->height_mm) {
width_mm = info->width_mm;
height_mm = info->height_mm;
@@ -421,6 +414,11 @@ void qemu_edid_generate(uint8_t *edid, size_t size,
height_mm = qemu_edid_dpi_to_mm(dpi, info->prefy);
}
+ generate_timings(&timings, refresh_rate, info->prefx, info->prefy);
+ if (info->prefx >= 4096 || info->prefy >= 4096 || timings.clock >= 65536) {
+ large_screen = 1;
+ }
+
/* =============== extensions =============== */
if (size >= 256) {
@@ -501,7 +499,7 @@ void qemu_edid_generate(uint8_t *edid, size_t size,
if (!large_screen) {
/* The DTD section has only 12 bits to store the resolution */
- edid_desc_timing(desc, refresh_rate, info->prefx, info->prefy,
+ edid_desc_timing(desc, &timings, info->prefx, info->prefy,
width_mm, height_mm);
desc = edid_desc_next(edid, dta, desc);
}
@@ -536,7 +534,7 @@ void qemu_edid_generate(uint8_t *edid, size_t size,
/* =============== display id extensions =============== */
if (did && large_screen) {
- qemu_displayid_generate(did, refresh_rate, info->prefx, info->prefy,
+ qemu_displayid_generate(did, &timings, info->prefx, info->prefy,
width_mm, height_mm);
}
diff --git a/hw/display/trace-events b/hw/display/trace-events
index 4a687d1b8e..91efc88f04 100644
--- a/hw/display/trace-events
+++ b/hw/display/trace-events
@@ -21,6 +21,9 @@ vmware_palette_write(uint32_t index, uint32_t value) "index %d, value 0x%x"
vmware_scratch_read(uint32_t index, uint32_t value) "index %d, value 0x%x"
vmware_scratch_write(uint32_t index, uint32_t value) "index %d, value 0x%x"
vmware_setmode(uint32_t w, uint32_t h, uint32_t bpp) "%dx%d @ %d bpp"
+vmware_verify_rect_less_than_zero(const char *name, const char *param, int x) "%s: %s was < 0 (%d)"
+vmware_verify_rect_greater_than_bound(const char *name, const char *param, int bound, int x) "%s: %s was > %d (%d)"
+vmware_verify_rect_surface_bound_exceeded(const char *name, const char *component, int bound, const char *param1, int value1, const char *param2, int value2) "%s: %s > %d (%s: %d, %s: %d)"
# virtio-gpu-base.c
virtio_gpu_features(bool virgl) "virgl %d"
diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c
index e2969a6c81..0cc43a1f15 100644
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -297,46 +297,52 @@ static inline bool vmsvga_verify_rect(DisplaySurface *surface,
int x, int y, int w, int h)
{
if (x < 0) {
- fprintf(stderr, "%s: x was < 0 (%d)\n", name, x);
+ trace_vmware_verify_rect_less_than_zero(name, "x", x);
return false;
}
if (x > SVGA_MAX_WIDTH) {
- fprintf(stderr, "%s: x was > %d (%d)\n", name, SVGA_MAX_WIDTH, x);
+ trace_vmware_verify_rect_greater_than_bound(name, "x", SVGA_MAX_WIDTH,
+ x);
return false;
}
if (w < 0) {
- fprintf(stderr, "%s: w was < 0 (%d)\n", name, w);
+ trace_vmware_verify_rect_less_than_zero(name, "w", w);
return false;
}
if (w > SVGA_MAX_WIDTH) {
- fprintf(stderr, "%s: w was > %d (%d)\n", name, SVGA_MAX_WIDTH, w);
+ trace_vmware_verify_rect_greater_than_bound(name, "w", SVGA_MAX_WIDTH,
+ w);
return false;
}
if (x + w > surface_width(surface)) {
- fprintf(stderr, "%s: width was > %d (x: %d, w: %d)\n",
- name, surface_width(surface), x, w);
+ trace_vmware_verify_rect_surface_bound_exceeded(name, "width",
+ surface_width(surface),
+ "x", x, "w", w);
return false;
}
if (y < 0) {
- fprintf(stderr, "%s: y was < 0 (%d)\n", name, y);
+ trace_vmware_verify_rect_less_than_zero(name, "y", y);
return false;
}
if (y > SVGA_MAX_HEIGHT) {
- fprintf(stderr, "%s: y was > %d (%d)\n", name, SVGA_MAX_HEIGHT, y);
+ trace_vmware_verify_rect_greater_than_bound(name, "y", SVGA_MAX_HEIGHT,
+ y);
return false;
}
if (h < 0) {
- fprintf(stderr, "%s: h was < 0 (%d)\n", name, h);
+ trace_vmware_verify_rect_less_than_zero(name, "h", h);
return false;
}
if (h > SVGA_MAX_HEIGHT) {
- fprintf(stderr, "%s: h was > %d (%d)\n", name, SVGA_MAX_HEIGHT, h);
+ trace_vmware_verify_rect_greater_than_bound(name, "y", SVGA_MAX_HEIGHT,
+ y);
return false;
}
if (y + h > surface_height(surface)) {
- fprintf(stderr, "%s: update height > %d (y: %d, h: %d)\n",
- name, surface_height(surface), y, h);
+ trace_vmware_verify_rect_surface_bound_exceeded(name, "height",
+ surface_height(surface),
+ "y", y, "h", h);
return false;
}
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 33463d9b8f..3a5afff5d7 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -30,6 +30,7 @@
#include "qemu/main-loop.h"
#include "qemu/timer.h"
#include "qemu/hw-version.h"
+#include "qemu/memalign.h"
#include "sysemu/sysemu.h"
#include "sysemu/blockdev.h"
#include "sysemu/dma.h"
@@ -434,12 +435,16 @@ static const AIOCBInfo trim_aiocb_info = {
static void ide_trim_bh_cb(void *opaque)
{
TrimAIOCB *iocb = opaque;
+ BlockBackend *blk = iocb->s->blk;
iocb->common.cb(iocb->common.opaque, iocb->ret);
qemu_bh_delete(iocb->bh);
iocb->bh = NULL;
qemu_aio_unref(iocb);
+
+ /* Paired with an increment in ide_issue_trim() */
+ blk_dec_in_flight(blk);
}
static void ide_issue_trim_cb(void *opaque, int ret)
@@ -509,6 +514,9 @@ BlockAIOCB *ide_issue_trim(
IDEState *s = opaque;
TrimAIOCB *iocb;
+ /* Paired with a decrement in ide_trim_bh_cb() */
+ blk_inc_in_flight(s->blk);
+
iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
iocb->s = s;
iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig
index 528e77b4a6..ec8d4cec29 100644
--- a/hw/intc/Kconfig
+++ b/hw/intc/Kconfig
@@ -73,6 +73,9 @@ config RISCV_ACLINT
config RISCV_APLIC
bool
+config RISCV_IMSIC
+ bool
+
config SIFIVE_PLIC
bool
diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c
index 6d3c8ee231..0b8f79a122 100644
--- a/hw/intc/arm_gicv3.c
+++ b/hw/intc/arm_gicv3.c
@@ -369,11 +369,19 @@ static const MemoryRegionOps gic_ops[] = {
.read_with_attrs = gicv3_dist_read,
.write_with_attrs = gicv3_dist_write,
.endianness = DEVICE_NATIVE_ENDIAN,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 1,
+ .impl.max_access_size = 8,
},
{
.read_with_attrs = gicv3_redist_read,
.write_with_attrs = gicv3_redist_write,
.endianness = DEVICE_NATIVE_ENDIAN,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 1,
+ .impl.max_access_size = 8,
}
};
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index d7e03d0cab..1a3d440a54 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -612,7 +612,8 @@ static uint64_t icv_hppir_read(CPUARMState *env, const ARMCPRegInfo *ri)
}
}
- trace_gicv3_icv_hppir_read(grp, gicv3_redist_affid(cs), value);
+ trace_gicv3_icv_hppir_read(ri->crm == 8 ? 0 : 1,
+ gicv3_redist_affid(cs), value);
return value;
}
diff --git a/hw/intc/arm_gicv3_dist.c b/hw/intc/arm_gicv3_dist.c
index 4164500ea9..28d913b211 100644
--- a/hw/intc/arm_gicv3_dist.c
+++ b/hw/intc/arm_gicv3_dist.c
@@ -838,7 +838,7 @@ MemTxResult gicv3_dist_read(void *opaque, hwaddr offset, uint64_t *data,
if (!r) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: invalid guest read at offset " TARGET_FMT_plx
- "size %u\n", __func__, offset, size);
+ " size %u\n", __func__, offset, size);
trace_gicv3_dist_badread(offset, size, attrs.secure);
/* The spec requires that reserved registers are RAZ/WI;
* so use MEMTX_ERROR returns from leaf functions as a way to
@@ -879,7 +879,7 @@ MemTxResult gicv3_dist_write(void *opaque, hwaddr offset, uint64_t data,
if (!r) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: invalid guest write at offset " TARGET_FMT_plx
- "size %u\n", __func__, offset, size);
+ " size %u\n", __func__, offset, size);
trace_gicv3_dist_badwrite(offset, data, size, attrs.secure);
/* The spec requires that reserved registers are RAZ/WI;
* so use MEMTX_ERROR returns from leaf functions as a way to
diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
index 4f598d3c14..b96b874afd 100644
--- a/hw/intc/arm_gicv3_its.c
+++ b/hw/intc/arm_gicv3_its.c
@@ -161,16 +161,22 @@ static MemTxResult get_cte(GICv3ITSState *s, uint16_t icid, CTEntry *cte)
if (entry_addr == -1) {
/* No L2 table entry, i.e. no valid CTE, or a memory error */
cte->valid = false;
- return res;
+ goto out;
}
cteval = address_space_ldq_le(as, entry_addr, MEMTXATTRS_UNSPECIFIED, &res);
if (res != MEMTX_OK) {
- return res;
+ goto out;
}
cte->valid = FIELD_EX64(cteval, CTE, VALID);
cte->rdbase = FIELD_EX64(cteval, CTE, RDBASE);
- return MEMTX_OK;
+out:
+ if (res != MEMTX_OK) {
+ trace_gicv3_its_cte_read_fault(icid);
+ } else {
+ trace_gicv3_its_cte_read(icid, cte->valid, cte->rdbase);
+ }
+ return res;
}
/*
@@ -187,6 +193,10 @@ static bool update_ite(GICv3ITSState *s, uint32_t eventid, const DTEntry *dte,
uint64_t itel = 0;
uint32_t iteh = 0;
+ trace_gicv3_its_ite_write(dte->ittaddr, eventid, ite->valid,
+ ite->inttype, ite->intid, ite->icid,
+ ite->vpeid, ite->doorbell);
+
if (ite->valid) {
itel = FIELD_DP64(itel, ITE_L, VALID, 1);
itel = FIELD_DP64(itel, ITE_L, INTTYPE, ite->inttype);
@@ -221,11 +231,13 @@ static MemTxResult get_ite(GICv3ITSState *s, uint32_t eventid,
itel = address_space_ldq_le(as, iteaddr, MEMTXATTRS_UNSPECIFIED, &res);
if (res != MEMTX_OK) {
+ trace_gicv3_its_ite_read_fault(dte->ittaddr, eventid);
return res;
}
iteh = address_space_ldl_le(as, iteaddr + 8, MEMTXATTRS_UNSPECIFIED, &res);
if (res != MEMTX_OK) {
+ trace_gicv3_its_ite_read_fault(dte->ittaddr, eventid);
return res;
}
@@ -235,6 +247,9 @@ static MemTxResult get_ite(GICv3ITSState *s, uint32_t eventid,
ite->icid = FIELD_EX64(itel, ITE_L, ICID);
ite->vpeid = FIELD_EX64(itel, ITE_L, VPEID);
ite->doorbell = FIELD_EX64(iteh, ITE_H, DOORBELL);
+ trace_gicv3_its_ite_read(dte->ittaddr, eventid, ite->valid,
+ ite->inttype, ite->intid, ite->icid,
+ ite->vpeid, ite->doorbell);
return MEMTX_OK;
}
@@ -254,17 +269,23 @@ static MemTxResult get_dte(GICv3ITSState *s, uint32_t devid, DTEntry *dte)
if (entry_addr == -1) {
/* No L2 table entry, i.e. no valid DTE, or a memory error */
dte->valid = false;
- return res;
+ goto out;
}
dteval = address_space_ldq_le(as, entry_addr, MEMTXATTRS_UNSPECIFIED, &res);
if (res != MEMTX_OK) {
- return res;
+ goto out;
}
dte->valid = FIELD_EX64(dteval, DTE, VALID);
dte->size = FIELD_EX64(dteval, DTE, SIZE);
/* DTE word field stores bits [51:8] of the ITT address */
dte->ittaddr = FIELD_EX64(dteval, DTE, ITTADDR) << ITTADDR_SHIFT;
- return MEMTX_OK;
+out:
+ if (res != MEMTX_OK) {
+ trace_gicv3_its_dte_read_fault(devid);
+ } else {
+ trace_gicv3_its_dte_read(devid, dte->valid, dte->size, dte->ittaddr);
+ }
+ return res;
}
/*
@@ -366,6 +387,19 @@ static ItsCmdResult process_its_cmd(GICv3ITSState *s, const uint64_t *cmdpkt,
devid = (cmdpkt[0] & DEVID_MASK) >> DEVID_SHIFT;
eventid = cmdpkt[1] & EVENTID_MASK;
+ switch (cmd) {
+ case INTERRUPT:
+ trace_gicv3_its_cmd_int(devid, eventid);
+ break;
+ case CLEAR:
+ trace_gicv3_its_cmd_clear(devid, eventid);
+ break;
+ case DISCARD:
+ trace_gicv3_its_cmd_discard(devid, eventid);
+ break;
+ default:
+ g_assert_not_reached();
+ }
return do_process_its_cmd(s, devid, eventid, cmd);
}
@@ -382,15 +416,16 @@ static ItsCmdResult process_mapti(GICv3ITSState *s, const uint64_t *cmdpkt,
devid = (cmdpkt[0] & DEVID_MASK) >> DEVID_SHIFT;
eventid = cmdpkt[1] & EVENTID_MASK;
+ icid = cmdpkt[2] & ICID_MASK;
if (ignore_pInt) {
pIntid = eventid;
+ trace_gicv3_its_cmd_mapi(devid, eventid, icid);
} else {
pIntid = (cmdpkt[1] & pINTID_MASK) >> pINTID_SHIFT;
+ trace_gicv3_its_cmd_mapti(devid, eventid, icid, pIntid);
}
- icid = cmdpkt[2] & ICID_MASK;
-
if (devid >= s->dt.num_entries) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: invalid command attributes: devid %d>=%d",
@@ -451,6 +486,8 @@ static bool update_cte(GICv3ITSState *s, uint16_t icid, const CTEntry *cte)
uint64_t cteval = 0;
MemTxResult res = MEMTX_OK;
+ trace_gicv3_its_cte_write(icid, cte->valid, cte->rdbase);
+
if (cte->valid) {
/* add mapping entry to collection table */
cteval = FIELD_DP64(cteval, CTE, VALID, 1);
@@ -484,6 +521,7 @@ static ItsCmdResult process_mapc(GICv3ITSState *s, const uint64_t *cmdpkt)
} else {
cte.rdbase = 0;
}
+ trace_gicv3_its_cmd_mapc(icid, cte.rdbase, cte.valid);
if (icid >= s->ct.num_entries) {
qemu_log_mask(LOG_GUEST_ERROR, "ITS MAPC: invalid ICID 0x%d", icid);
@@ -509,6 +547,8 @@ static bool update_dte(GICv3ITSState *s, uint32_t devid, const DTEntry *dte)
uint64_t dteval = 0;
MemTxResult res = MEMTX_OK;
+ trace_gicv3_its_dte_write(devid, dte->valid, dte->size, dte->ittaddr);
+
if (dte->valid) {
/* add mapping entry to device table */
dteval = FIELD_DP64(dteval, DTE, VALID, 1);
@@ -539,6 +579,8 @@ static ItsCmdResult process_mapd(GICv3ITSState *s, const uint64_t *cmdpkt)
dte.ittaddr = (cmdpkt[2] & ITTADDR_MASK) >> ITTADDR_SHIFT;
dte.valid = cmdpkt[2] & CMD_FIELD_VALID_MASK;
+ trace_gicv3_its_cmd_mapd(devid, dte.size, dte.ittaddr, dte.valid);
+
if (devid >= s->dt.num_entries) {
qemu_log_mask(LOG_GUEST_ERROR,
"ITS MAPD: invalid device ID field 0x%x >= 0x%x\n",
@@ -562,6 +604,8 @@ static ItsCmdResult process_movall(GICv3ITSState *s, const uint64_t *cmdpkt)
rd1 = FIELD_EX64(cmdpkt[2], MOVALL_2, RDBASE1);
rd2 = FIELD_EX64(cmdpkt[3], MOVALL_3, RDBASE2);
+ trace_gicv3_its_cmd_movall(rd1, rd2);
+
if (rd1 >= s->gicv3->num_cpu) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: RDBASE1 %" PRId64
@@ -601,6 +645,8 @@ static ItsCmdResult process_movi(GICv3ITSState *s, const uint64_t *cmdpkt)
eventid = FIELD_EX64(cmdpkt[1], MOVI_1, EVENTID);
new_icid = FIELD_EX64(cmdpkt[2], MOVI_2, ICID);
+ trace_gicv3_its_cmd_movi(devid, eventid, new_icid);
+
if (devid >= s->dt.num_entries) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: invalid command attributes: devid %d>=%d",
@@ -779,6 +825,7 @@ static void process_cmdq(GICv3ITSState *s)
* is already consistent by the time SYNC command is executed.
* Hence no further processing is required for SYNC command.
*/
+ trace_gicv3_its_cmd_sync();
break;
case GITS_CMD_MAPD:
result = process_mapd(s, cmdpkt);
@@ -803,6 +850,7 @@ static void process_cmdq(GICv3ITSState *s)
* need to trigger lpi priority re-calculation to be in
* sync with LPI config table or pending table changes.
*/
+ trace_gicv3_its_cmd_inv();
for (i = 0; i < s->gicv3->num_cpu; i++) {
gicv3_redist_update_lpi(&s->gicv3->cpu[i]);
}
@@ -814,6 +862,7 @@ static void process_cmdq(GICv3ITSState *s)
result = process_movall(s, cmdpkt);
break;
default:
+ trace_gicv3_its_cmd_unknown(cmd);
break;
}
if (result == CMD_CONTINUE) {
@@ -1264,7 +1313,7 @@ static MemTxResult gicv3_its_read(void *opaque, hwaddr offset, uint64_t *data,
if (!result) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: invalid guest read at offset " TARGET_FMT_plx
- "size %u\n", __func__, offset, size);
+ " size %u\n", __func__, offset, size);
trace_gicv3_its_badread(offset, size);
/*
* The spec requires that reserved registers are RAZ/WI;
@@ -1300,7 +1349,7 @@ static MemTxResult gicv3_its_write(void *opaque, hwaddr offset, uint64_t data,
if (!result) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: invalid guest write at offset " TARGET_FMT_plx
- "size %u\n", __func__, offset, size);
+ " size %u\n", __func__, offset, size);
trace_gicv3_its_badwrite(offset, data, size);
/*
* The spec requires that reserved registers are RAZ/WI;
diff --git a/hw/intc/meson.build b/hw/intc/meson.build
index d953197413..81ccdb0d78 100644
--- a/hw/intc/meson.build
+++ b/hw/intc/meson.build
@@ -51,6 +51,7 @@ specific_ss.add(when: 'CONFIG_S390_FLIC_KVM', if_true: files('s390_flic_kvm.c'))
specific_ss.add(when: 'CONFIG_SH_INTC', if_true: files('sh_intc.c'))
specific_ss.add(when: 'CONFIG_RISCV_ACLINT', if_true: files('riscv_aclint.c'))
specific_ss.add(when: 'CONFIG_RISCV_APLIC', if_true: files('riscv_aplic.c'))
+specific_ss.add(when: 'CONFIG_RISCV_IMSIC', if_true: files('riscv_imsic.c'))
specific_ss.add(when: 'CONFIG_SIFIVE_PLIC', if_true: files('sifive_plic.c'))
specific_ss.add(when: 'CONFIG_XICS', if_true: files('xics.c', 'xive2.c'))
specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_XICS'],
diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c
new file mode 100644
index 0000000000..8615e4cc1d
--- /dev/null
+++ b/hw/intc/riscv_imsic.c
@@ -0,0 +1,448 @@
+/*
+ * RISC-V IMSIC (Incoming Message Signaled Interrupt Controller)
+ *
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "qemu/bswap.h"
+#include "exec/address-spaces.h"
+#include "hw/sysbus.h"
+#include "hw/pci/msi.h"
+#include "hw/boards.h"
+#include "hw/qdev-properties.h"
+#include "hw/intc/riscv_imsic.h"
+#include "hw/irq.h"
+#include "target/riscv/cpu.h"
+#include "target/riscv/cpu_bits.h"
+#include "sysemu/sysemu.h"
+#include "migration/vmstate.h"
+
+#define IMSIC_MMIO_PAGE_LE 0x00
+#define IMSIC_MMIO_PAGE_BE 0x04
+
+#define IMSIC_MIN_ID ((IMSIC_EIPx_BITS * 2) - 1)
+#define IMSIC_MAX_ID (IMSIC_TOPEI_IID_MASK)
+
+#define IMSIC_EISTATE_PENDING (1U << 0)
+#define IMSIC_EISTATE_ENABLED (1U << 1)
+#define IMSIC_EISTATE_ENPEND (IMSIC_EISTATE_ENABLED | \
+ IMSIC_EISTATE_PENDING)
+
+static uint32_t riscv_imsic_topei(RISCVIMSICState *imsic, uint32_t page)
+{
+ uint32_t i, max_irq, base;
+
+ base = page * imsic->num_irqs;
+ max_irq = (imsic->eithreshold[page] &&
+ (imsic->eithreshold[page] <= imsic->num_irqs)) ?
+ imsic->eithreshold[page] : imsic->num_irqs;
+ for (i = 1; i < max_irq; i++) {
+ if ((imsic->eistate[base + i] & IMSIC_EISTATE_ENPEND) ==
+ IMSIC_EISTATE_ENPEND) {
+ return (i << IMSIC_TOPEI_IID_SHIFT) | i;
+ }
+ }
+
+ return 0;
+}
+
+static void riscv_imsic_update(RISCVIMSICState *imsic, uint32_t page)
+{
+ if (imsic->eidelivery[page] && riscv_imsic_topei(imsic, page)) {
+ qemu_irq_raise(imsic->external_irqs[page]);
+ } else {
+ qemu_irq_lower(imsic->external_irqs[page]);
+ }
+}
+
+static int riscv_imsic_eidelivery_rmw(RISCVIMSICState *imsic, uint32_t page,
+ target_ulong *val,
+ target_ulong new_val,
+ target_ulong wr_mask)
+{
+ target_ulong old_val = imsic->eidelivery[page];
+
+ if (val) {
+ *val = old_val;
+ }
+
+ wr_mask &= 0x1;
+ imsic->eidelivery[page] = (old_val & ~wr_mask) | (new_val & wr_mask);
+
+ riscv_imsic_update(imsic, page);
+ return 0;
+}
+
+static int riscv_imsic_eithreshold_rmw(RISCVIMSICState *imsic, uint32_t page,
+ target_ulong *val,
+ target_ulong new_val,
+ target_ulong wr_mask)
+{
+ target_ulong old_val = imsic->eithreshold[page];
+
+ if (val) {
+ *val = old_val;
+ }
+
+ wr_mask &= IMSIC_MAX_ID;
+ imsic->eithreshold[page] = (old_val & ~wr_mask) | (new_val & wr_mask);
+
+ riscv_imsic_update(imsic, page);
+ return 0;
+}
+
+static int riscv_imsic_topei_rmw(RISCVIMSICState *imsic, uint32_t page,
+ target_ulong *val, target_ulong new_val,
+ target_ulong wr_mask)
+{
+ uint32_t base, topei = riscv_imsic_topei(imsic, page);
+
+ /* Read pending and enabled interrupt with highest priority */
+ if (val) {
+ *val = topei;
+ }
+
+ /* Writes ignore value and clear top pending interrupt */
+ if (topei && wr_mask) {
+ topei >>= IMSIC_TOPEI_IID_SHIFT;
+ base = page * imsic->num_irqs;
+ if (topei) {
+ imsic->eistate[base + topei] &= ~IMSIC_EISTATE_PENDING;
+ }
+
+ riscv_imsic_update(imsic, page);
+ }
+
+ return 0;
+}
+
+static int riscv_imsic_eix_rmw(RISCVIMSICState *imsic,
+ uint32_t xlen, uint32_t page,
+ uint32_t num, bool pend, target_ulong *val,
+ target_ulong new_val, target_ulong wr_mask)
+{
+ uint32_t i, base;
+ target_ulong mask;
+ uint32_t state = (pend) ? IMSIC_EISTATE_PENDING : IMSIC_EISTATE_ENABLED;
+
+ if (xlen != 32) {
+ if (num & 0x1) {
+ return -EINVAL;
+ }
+ num >>= 1;
+ }
+ if (num >= (imsic->num_irqs / xlen)) {
+ return -EINVAL;
+ }
+
+ base = (page * imsic->num_irqs) + (num * xlen);
+
+ if (val) {
+ *val = 0;
+ for (i = 0; i < xlen; i++) {
+ mask = (target_ulong)1 << i;
+ *val |= (imsic->eistate[base + i] & state) ? mask : 0;
+ }
+ }
+
+ for (i = 0; i < xlen; i++) {
+ /* Bit0 of eip0 and eie0 are read-only zero */
+ if (!num && !i) {
+ continue;
+ }
+
+ mask = (target_ulong)1 << i;
+ if (wr_mask & mask) {
+ if (new_val & mask) {
+ imsic->eistate[base + i] |= state;
+ } else {
+ imsic->eistate[base + i] &= ~state;
+ }
+ }
+ }
+
+ riscv_imsic_update(imsic, page);
+ return 0;
+}
+
+static int riscv_imsic_rmw(void *arg, target_ulong reg, target_ulong *val,
+ target_ulong new_val, target_ulong wr_mask)
+{
+ RISCVIMSICState *imsic = arg;
+ uint32_t isel, priv, virt, vgein, xlen, page;
+
+ priv = AIA_IREG_PRIV(reg);
+ virt = AIA_IREG_VIRT(reg);
+ isel = AIA_IREG_ISEL(reg);
+ vgein = AIA_IREG_VGEIN(reg);
+ xlen = AIA_IREG_XLEN(reg);
+
+ if (imsic->mmode) {
+ if (priv == PRV_M && !virt) {
+ page = 0;
+ } else {
+ goto err;
+ }
+ } else {
+ if (priv == PRV_S) {
+ if (virt) {
+ if (vgein && vgein < imsic->num_pages) {
+ page = vgein;
+ } else {
+ goto err;
+ }
+ } else {
+ page = 0;
+ }
+ } else {
+ goto err;
+ }
+ }
+
+ switch (isel) {
+ case ISELECT_IMSIC_EIDELIVERY:
+ return riscv_imsic_eidelivery_rmw(imsic, page, val,
+ new_val, wr_mask);
+ case ISELECT_IMSIC_EITHRESHOLD:
+ return riscv_imsic_eithreshold_rmw(imsic, page, val,
+ new_val, wr_mask);
+ case ISELECT_IMSIC_TOPEI:
+ return riscv_imsic_topei_rmw(imsic, page, val, new_val, wr_mask);
+ case ISELECT_IMSIC_EIP0 ... ISELECT_IMSIC_EIP63:
+ return riscv_imsic_eix_rmw(imsic, xlen, page,
+ isel - ISELECT_IMSIC_EIP0,
+ true, val, new_val, wr_mask);
+ case ISELECT_IMSIC_EIE0 ... ISELECT_IMSIC_EIE63:
+ return riscv_imsic_eix_rmw(imsic, xlen, page,
+ isel - ISELECT_IMSIC_EIE0,
+ false, val, new_val, wr_mask);
+ default:
+ break;
+ };
+
+err:
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Invalid register priv=%d virt=%d isel=%d vgein=%d\n",
+ __func__, priv, virt, isel, vgein);
+ return -EINVAL;
+}
+
+static uint64_t riscv_imsic_read(void *opaque, hwaddr addr, unsigned size)
+{
+ RISCVIMSICState *imsic = opaque;
+
+ /* Reads must be 4 byte words */
+ if ((addr & 0x3) != 0) {
+ goto err;
+ }
+
+ /* Reads cannot be out of range */
+ if (addr > IMSIC_MMIO_SIZE(imsic->num_pages)) {
+ goto err;
+ }
+
+ return 0;
+
+err:
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Invalid register read 0x%" HWADDR_PRIx "\n",
+ __func__, addr);
+ return 0;
+}
+
+static void riscv_imsic_write(void *opaque, hwaddr addr, uint64_t value,
+ unsigned size)
+{
+ RISCVIMSICState *imsic = opaque;
+ uint32_t page;
+
+ /* Writes must be 4 byte words */
+ if ((addr & 0x3) != 0) {
+ goto err;
+ }
+
+ /* Writes cannot be out of range */
+ if (addr > IMSIC_MMIO_SIZE(imsic->num_pages)) {
+ goto err;
+ }
+
+ /* Writes only supported for MSI little-endian registers */
+ page = addr >> IMSIC_MMIO_PAGE_SHIFT;
+ if ((addr & (IMSIC_MMIO_PAGE_SZ - 1)) == IMSIC_MMIO_PAGE_LE) {
+ if (value && (value < imsic->num_irqs)) {
+ imsic->eistate[(page * imsic->num_irqs) + value] |=
+ IMSIC_EISTATE_PENDING;
+ }
+ }
+
+ /* Update CPU external interrupt status */
+ riscv_imsic_update(imsic, page);
+
+ return;
+
+err:
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Invalid register write 0x%" HWADDR_PRIx "\n",
+ __func__, addr);
+}
+
+static const MemoryRegionOps riscv_imsic_ops = {
+ .read = riscv_imsic_read,
+ .write = riscv_imsic_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4
+ }
+};
+
+static void riscv_imsic_realize(DeviceState *dev, Error **errp)
+{
+ RISCVIMSICState *imsic = RISCV_IMSIC(dev);
+ RISCVCPU *rcpu = RISCV_CPU(qemu_get_cpu(imsic->hartid));
+ CPUState *cpu = qemu_get_cpu(imsic->hartid);
+ CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
+
+ imsic->num_eistate = imsic->num_pages * imsic->num_irqs;
+ imsic->eidelivery = g_new0(uint32_t, imsic->num_pages);
+ imsic->eithreshold = g_new0(uint32_t, imsic->num_pages);
+ imsic->eistate = g_new0(uint32_t, imsic->num_eistate);
+
+ memory_region_init_io(&imsic->mmio, OBJECT(dev), &riscv_imsic_ops,
+ imsic, TYPE_RISCV_IMSIC,
+ IMSIC_MMIO_SIZE(imsic->num_pages));
+ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &imsic->mmio);
+
+ /* Claim the CPU interrupt to be triggered by this IMSIC */
+ if (riscv_cpu_claim_interrupts(rcpu,
+ (imsic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) {
+ error_setg(errp, "%s already claimed",
+ (imsic->mmode) ? "MEIP" : "SEIP");
+ return;
+ }
+
+ /* Create output IRQ lines */
+ imsic->external_irqs = g_malloc(sizeof(qemu_irq) * imsic->num_pages);
+ qdev_init_gpio_out(dev, imsic->external_irqs, imsic->num_pages);
+
+ /* Force select AIA feature and setup CSR read-modify-write callback */
+ if (env) {
+ riscv_set_feature(env, RISCV_FEATURE_AIA);
+ if (!imsic->mmode) {
+ riscv_cpu_set_geilen(env, imsic->num_pages - 1);
+ }
+ riscv_cpu_set_aia_ireg_rmw_fn(env, (imsic->mmode) ? PRV_M : PRV_S,
+ riscv_imsic_rmw, imsic);
+ }
+
+ msi_nonbroken = true;
+}
+
+static Property riscv_imsic_properties[] = {
+ DEFINE_PROP_BOOL("mmode", RISCVIMSICState, mmode, 0),
+ DEFINE_PROP_UINT32("hartid", RISCVIMSICState, hartid, 0),
+ DEFINE_PROP_UINT32("num-pages", RISCVIMSICState, num_pages, 0),
+ DEFINE_PROP_UINT32("num-irqs", RISCVIMSICState, num_irqs, 0),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static const VMStateDescription vmstate_riscv_imsic = {
+ .name = "riscv_imsic",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_VARRAY_UINT32(eidelivery, RISCVIMSICState,
+ num_pages, 0,
+ vmstate_info_uint32, uint32_t),
+ VMSTATE_VARRAY_UINT32(eithreshold, RISCVIMSICState,
+ num_pages, 0,
+ vmstate_info_uint32, uint32_t),
+ VMSTATE_VARRAY_UINT32(eistate, RISCVIMSICState,
+ num_eistate, 0,
+ vmstate_info_uint32, uint32_t),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void riscv_imsic_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ device_class_set_props(dc, riscv_imsic_properties);
+ dc->realize = riscv_imsic_realize;
+ dc->vmsd = &vmstate_riscv_imsic;
+}
+
+static const TypeInfo riscv_imsic_info = {
+ .name = TYPE_RISCV_IMSIC,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(RISCVIMSICState),
+ .class_init = riscv_imsic_class_init,
+};
+
+static void riscv_imsic_register_types(void)
+{
+ type_register_static(&riscv_imsic_info);
+}
+
+type_init(riscv_imsic_register_types)
+
+/*
+ * Create IMSIC device.
+ */
+DeviceState *riscv_imsic_create(hwaddr addr, uint32_t hartid, bool mmode,
+ uint32_t num_pages, uint32_t num_ids)
+{
+ DeviceState *dev = qdev_new(TYPE_RISCV_IMSIC);
+ CPUState *cpu = qemu_get_cpu(hartid);
+ uint32_t i;
+
+ assert(!(addr & (IMSIC_MMIO_PAGE_SZ - 1)));
+ if (mmode) {
+ assert(num_pages == 1);
+ } else {
+ assert(num_pages >= 1 && num_pages <= (IRQ_LOCAL_GUEST_MAX + 1));
+ }
+ assert(IMSIC_MIN_ID <= num_ids);
+ assert(num_ids <= IMSIC_MAX_ID);
+ assert((num_ids & IMSIC_MIN_ID) == IMSIC_MIN_ID);
+
+ qdev_prop_set_bit(dev, "mmode", mmode);
+ qdev_prop_set_uint32(dev, "hartid", hartid);
+ qdev_prop_set_uint32(dev, "num-pages", num_pages);
+ qdev_prop_set_uint32(dev, "num-irqs", num_ids + 1);
+
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
+
+ for (i = 0; i < num_pages; i++) {
+ if (!i) {
+ qdev_connect_gpio_out_named(dev, NULL, i,
+ qdev_get_gpio_in(DEVICE(cpu),
+ (mmode) ? IRQ_M_EXT : IRQ_S_EXT));
+ } else {
+ qdev_connect_gpio_out_named(dev, NULL, i,
+ qdev_get_gpio_in(DEVICE(cpu),
+ IRQ_LOCAL_MAX + i - 1));
+ }
+ }
+
+ return dev;
+}
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index b28cda4e08..53414aa197 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -176,6 +176,27 @@ gicv3_its_write(uint64_t offset, uint64_t data, unsigned size) "GICv3 ITS write:
gicv3_its_badwrite(uint64_t offset, uint64_t data, unsigned size) "GICv3 ITS write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u: error"
gicv3_its_translation_write(uint64_t offset, uint64_t data, unsigned size, uint32_t requester_id) "GICv3 ITS TRANSLATER write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u requester_id 0x%x"
gicv3_its_process_command(uint32_t rd_offset, uint8_t cmd) "GICv3 ITS: processing command at offset 0x%x: 0x%x"
+gicv3_its_cmd_int(uint32_t devid, uint32_t eventid) "GICv3 ITS: command INT DeviceID 0x%x EventID 0x%x"
+gicv3_its_cmd_clear(uint32_t devid, uint32_t eventid) "GICv3 ITS: command CLEAR DeviceID 0x%x EventID 0x%x"
+gicv3_its_cmd_discard(uint32_t devid, uint32_t eventid) "GICv3 ITS: command DISCARD DeviceID 0x%x EventID 0x%x"
+gicv3_its_cmd_sync(void) "GICv3 ITS: command SYNC"
+gicv3_its_cmd_mapd(uint32_t devid, uint32_t size, uint64_t ittaddr, int valid) "GICv3 ITS: command MAPD DeviceID 0x%x Size 0x%x ITT_addr 0x%" PRIx64 " V %d"
+gicv3_its_cmd_mapc(uint32_t icid, uint64_t rdbase, int valid) "GICv3 ITS: command MAPC ICID 0x%x RDbase 0x%" PRIx64 " V %d"
+gicv3_its_cmd_mapi(uint32_t devid, uint32_t eventid, uint32_t icid) "GICv3 ITS: command MAPI DeviceID 0x%x EventID 0x%x ICID 0x%x"
+gicv3_its_cmd_mapti(uint32_t devid, uint32_t eventid, uint32_t icid, uint32_t intid) "GICv3 ITS: command MAPTI DeviceID 0x%x EventID 0x%x ICID 0x%x pINTID 0x%x"
+gicv3_its_cmd_inv(void) "GICv3 ITS: command INV or INVALL"
+gicv3_its_cmd_movall(uint64_t rd1, uint64_t rd2) "GICv3 ITS: command MOVALL RDbase1 0x%" PRIx64 " RDbase2 0x%" PRIx64
+gicv3_its_cmd_movi(uint32_t devid, uint32_t eventid, uint32_t icid) "GICv3 ITS: command MOVI DeviceID 0x%x EventID 0x%x ICID 0x%x"
+gicv3_its_cmd_unknown(unsigned cmd) "GICv3 ITS: unknown command 0x%x"
+gicv3_its_cte_read(uint32_t icid, int valid, uint32_t rdbase) "GICv3 ITS: Collection Table read for ICID 0x%x: valid %d RDBase 0x%x"
+gicv3_its_cte_write(uint32_t icid, int valid, uint32_t rdbase) "GICv3 ITS: Collection Table write for ICID 0x%x: valid %d RDBase 0x%x"
+gicv3_its_cte_read_fault(uint32_t icid) "GICv3 ITS: Collection Table read for ICID 0x%x: faulted"
+gicv3_its_ite_read(uint64_t ittaddr, uint32_t eventid, int valid, int inttype, uint32_t intid, uint32_t icid, uint32_t vpeid, uint32_t doorbell) "GICv3 ITS: Interrupt Table read for ITTaddr 0x%" PRIx64 " EventID 0x%x: valid %d inttype %d intid 0x%x ICID 0x%x vPEID 0x%x doorbell 0x%x"
+gicv3_its_ite_read_fault(uint64_t ittaddr, uint32_t eventid) "GICv3 ITS: Interrupt Table read for ITTaddr 0x%" PRIx64 " EventID 0x%x: faulted"
+gicv3_its_ite_write(uint64_t ittaddr, uint32_t eventid, int valid, int inttype, uint32_t intid, uint32_t icid, uint32_t vpeid, uint32_t doorbell) "GICv3 ITS: Interrupt Table write for ITTaddr 0x%" PRIx64 " EventID 0x%x: valid %d inttype %d intid 0x%x ICID 0x%x vPEID 0x%x doorbell 0x%x"
+gicv3_its_dte_read(uint32_t devid, int valid, uint32_t size, uint64_t ittaddr) "GICv3 ITS: Device Table read for DeviceID 0x%x: valid %d size 0x%x ITTaddr 0x%" PRIx64
+gicv3_its_dte_write(uint32_t devid, int valid, uint32_t size, uint64_t ittaddr) "GICv3 ITS: Device Table write for DeviceID 0x%x: valid %d size 0x%x ITTaddr 0x%" PRIx64
+gicv3_its_dte_read_fault(uint32_t devid) "GICv3 ITS: Device Table read for DeviceID 0x%x: faulted"
# armv7m_nvic.c
nvic_recompute_state(int vectpending, int vectpending_prio, int exception_prio) "NVIC state recomputed: vectpending %d vectpending_prio %d exception_prio %d"
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 98aac98bef..03760ddeae 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -163,6 +163,7 @@
#include "migration/vmstate.h"
#include "nvme.h"
+#include "dif.h"
#include "trace.h"
#define NVME_MAX_IOQPAIRS 0xffff
@@ -195,6 +196,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = {
[NVME_WRITE_ATOMICITY] = true,
[NVME_ASYNCHRONOUS_EVENT_CONF] = true,
[NVME_TIMESTAMP] = true,
+ [NVME_HOST_BEHAVIOR_SUPPORT] = true,
[NVME_COMMAND_SET_PROFILE] = true,
};
@@ -205,6 +207,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
[NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE,
[NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE,
[NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE,
+ [NVME_HOST_BEHAVIOR_SUPPORT] = NVME_FEAT_CAP_CHANGE,
[NVME_COMMAND_SET_PROFILE] = NVME_FEAT_CAP_CHANGE,
};
@@ -1065,7 +1068,8 @@ static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
size_t len = nvme_l2b(ns, nlb);
uint16_t status;
- if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) {
+ if (nvme_ns_ext(ns) &&
+ !(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
NvmeSg sg;
len += nvme_m2b(ns, nlb);
@@ -1244,7 +1248,8 @@ uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps);
bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT);
- if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) {
+ if (nvme_ns_ext(ns) &&
+ !(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbasz,
ns->lbaf.ms, 0, dir);
}
@@ -2045,9 +2050,12 @@ static void nvme_verify_cb(void *opaque, int ret)
uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
uint16_t apptag = le16_to_cpu(rw->apptag);
uint16_t appmask = le16_to_cpu(rw->appmask);
- uint32_t reftag = le32_to_cpu(rw->reftag);
+ uint64_t reftag = le32_to_cpu(rw->reftag);
+ uint64_t cdw3 = le32_to_cpu(rw->cdw3);
uint16_t status;
+ reftag |= cdw3 << 32;
+
trace_pci_nvme_verify_cb(nvme_cid(req), prinfo, apptag, appmask, reftag);
if (ret) {
@@ -2136,7 +2144,8 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
uint16_t apptag = le16_to_cpu(rw->apptag);
uint16_t appmask = le16_to_cpu(rw->appmask);
- uint32_t reftag = le32_to_cpu(rw->reftag);
+ uint64_t reftag = le32_to_cpu(rw->reftag);
+ uint64_t cdw3 = le32_to_cpu(rw->cdw3);
struct nvme_compare_ctx *ctx = req->opaque;
g_autofree uint8_t *buf = NULL;
BlockBackend *blk = ns->blkconf.blk;
@@ -2144,6 +2153,8 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
BlockAcctStats *stats = blk_get_stats(blk);
uint16_t status = NVME_SUCCESS;
+ reftag |= cdw3 << 32;
+
trace_pci_nvme_compare_mdata_cb(nvme_cid(req));
if (ret) {
@@ -2181,7 +2192,7 @@ static void nvme_compare_mdata_cb(void *opaque, int ret)
* tuple.
*/
if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
- pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+ pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
}
for (bufp = buf; mbufp < end; bufp += ns->lbaf.ms, mbufp += ns->lbaf.ms) {
@@ -2522,7 +2533,8 @@ typedef struct NvmeCopyAIOCB {
QEMUBH *bh;
int ret;
- NvmeCopySourceRange *ranges;
+ void *ranges;
+ unsigned int format;
int nr;
int idx;
@@ -2533,7 +2545,7 @@ typedef struct NvmeCopyAIOCB {
BlockAcctCookie write;
} acct;
- uint32_t reftag;
+ uint64_t reftag;
uint64_t slba;
NvmeZone *zone;
@@ -2587,13 +2599,101 @@ static void nvme_copy_bh(void *opaque)
static void nvme_copy_cb(void *opaque, int ret);
+static void nvme_copy_source_range_parse_format0(void *ranges, int idx,
+ uint64_t *slba, uint32_t *nlb,
+ uint16_t *apptag,
+ uint16_t *appmask,
+ uint64_t *reftag)
+{
+ NvmeCopySourceRangeFormat0 *_ranges = ranges;
+
+ if (slba) {
+ *slba = le64_to_cpu(_ranges[idx].slba);
+ }
+
+ if (nlb) {
+ *nlb = le16_to_cpu(_ranges[idx].nlb) + 1;
+ }
+
+ if (apptag) {
+ *apptag = le16_to_cpu(_ranges[idx].apptag);
+ }
+
+ if (appmask) {
+ *appmask = le16_to_cpu(_ranges[idx].appmask);
+ }
+
+ if (reftag) {
+ *reftag = le32_to_cpu(_ranges[idx].reftag);
+ }
+}
+
+static void nvme_copy_source_range_parse_format1(void *ranges, int idx,
+ uint64_t *slba, uint32_t *nlb,
+ uint16_t *apptag,
+ uint16_t *appmask,
+ uint64_t *reftag)
+{
+ NvmeCopySourceRangeFormat1 *_ranges = ranges;
+
+ if (slba) {
+ *slba = le64_to_cpu(_ranges[idx].slba);
+ }
+
+ if (nlb) {
+ *nlb = le16_to_cpu(_ranges[idx].nlb) + 1;
+ }
+
+ if (apptag) {
+ *apptag = le16_to_cpu(_ranges[idx].apptag);
+ }
+
+ if (appmask) {
+ *appmask = le16_to_cpu(_ranges[idx].appmask);
+ }
+
+ if (reftag) {
+ *reftag = 0;
+
+ *reftag |= (uint64_t)_ranges[idx].sr[4] << 40;
+ *reftag |= (uint64_t)_ranges[idx].sr[5] << 32;
+ *reftag |= (uint64_t)_ranges[idx].sr[6] << 24;
+ *reftag |= (uint64_t)_ranges[idx].sr[7] << 16;
+ *reftag |= (uint64_t)_ranges[idx].sr[8] << 8;
+ *reftag |= (uint64_t)_ranges[idx].sr[9];
+ }
+}
+
+static void nvme_copy_source_range_parse(void *ranges, int idx, uint8_t format,
+ uint64_t *slba, uint32_t *nlb,
+ uint16_t *apptag, uint16_t *appmask,
+ uint64_t *reftag)
+{
+ switch (format) {
+ case NVME_COPY_FORMAT_0:
+ nvme_copy_source_range_parse_format0(ranges, idx, slba, nlb, apptag,
+ appmask, reftag);
+ break;
+
+ case NVME_COPY_FORMAT_1:
+ nvme_copy_source_range_parse_format1(ranges, idx, slba, nlb, apptag,
+ appmask, reftag);
+ break;
+
+ default:
+ abort();
+ }
+}
+
static void nvme_copy_out_completed_cb(void *opaque, int ret)
{
NvmeCopyAIOCB *iocb = opaque;
NvmeRequest *req = iocb->req;
NvmeNamespace *ns = req->ns;
- NvmeCopySourceRange *range = &iocb->ranges[iocb->idx];
- uint32_t nlb = le32_to_cpu(range->nlb) + 1;
+ uint32_t nlb;
+
+ nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL,
+ &nlb, NULL, NULL, NULL);
if (ret < 0) {
iocb->ret = ret;
@@ -2617,7 +2717,6 @@ static void nvme_copy_out_cb(void *opaque, int ret)
NvmeCopyAIOCB *iocb = opaque;
NvmeRequest *req = iocb->req;
NvmeNamespace *ns = req->ns;
- NvmeCopySourceRange *range;
uint32_t nlb;
size_t mlen;
uint8_t *mbounce;
@@ -2634,8 +2733,8 @@ static void nvme_copy_out_cb(void *opaque, int ret)
return;
}
- range = &iocb->ranges[iocb->idx];
- nlb = le32_to_cpu(range->nlb) + 1;
+ nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL,
+ &nlb, NULL, NULL, NULL);
mlen = nvme_m2b(ns, nlb);
mbounce = iocb->bounce + nvme_l2b(ns, nlb);
@@ -2658,8 +2757,10 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret)
NvmeCopyAIOCB *iocb = opaque;
NvmeRequest *req = iocb->req;
NvmeNamespace *ns = req->ns;
- NvmeCopySourceRange *range;
uint32_t nlb;
+ uint64_t slba;
+ uint16_t apptag, appmask;
+ uint64_t reftag;
size_t len;
uint16_t status;
@@ -2670,8 +2771,8 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret)
goto out;
}
- range = &iocb->ranges[iocb->idx];
- nlb = le32_to_cpu(range->nlb) + 1;
+ nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba,
+ &nlb, &apptag, &appmask, &reftag);
len = nvme_l2b(ns, nlb);
trace_pci_nvme_copy_out(iocb->slba, nlb);
@@ -2682,11 +2783,6 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret)
uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
- uint16_t apptag = le16_to_cpu(range->apptag);
- uint16_t appmask = le16_to_cpu(range->appmask);
- uint32_t reftag = le32_to_cpu(range->reftag);
-
- uint64_t slba = le64_to_cpu(range->slba);
size_t mlen = nvme_m2b(ns, nlb);
uint8_t *mbounce = iocb->bounce + nvme_l2b(ns, nlb);
@@ -2759,7 +2855,6 @@ static void nvme_copy_in_cb(void *opaque, int ret)
NvmeCopyAIOCB *iocb = opaque;
NvmeRequest *req = iocb->req;
NvmeNamespace *ns = req->ns;
- NvmeCopySourceRange *range;
uint64_t slba;
uint32_t nlb;
@@ -2775,9 +2870,8 @@ static void nvme_copy_in_cb(void *opaque, int ret)
return;
}
- range = &iocb->ranges[iocb->idx];
- slba = le64_to_cpu(range->slba);
- nlb = le32_to_cpu(range->nlb) + 1;
+ nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba,
+ &nlb, NULL, NULL, NULL);
qemu_iovec_reset(&iocb->iov);
qemu_iovec_add(&iocb->iov, iocb->bounce + nvme_l2b(ns, nlb),
@@ -2797,7 +2891,6 @@ static void nvme_copy_cb(void *opaque, int ret)
NvmeCopyAIOCB *iocb = opaque;
NvmeRequest *req = iocb->req;
NvmeNamespace *ns = req->ns;
- NvmeCopySourceRange *range;
uint64_t slba;
uint32_t nlb;
size_t len;
@@ -2814,9 +2907,8 @@ static void nvme_copy_cb(void *opaque, int ret)
goto done;
}
- range = &iocb->ranges[iocb->idx];
- slba = le64_to_cpu(range->slba);
- nlb = le32_to_cpu(range->nlb) + 1;
+ nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba,
+ &nlb, NULL, NULL, NULL);
len = nvme_l2b(ns, nlb);
trace_pci_nvme_copy_source_range(slba, nlb);
@@ -2872,6 +2964,7 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
uint8_t format = copy->control[0] & 0xf;
uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
+ size_t len = sizeof(NvmeCopySourceRangeFormat0);
uint16_t status;
@@ -2897,10 +2990,18 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
goto invalid;
}
- iocb->ranges = g_new(NvmeCopySourceRange, nr);
+ if (ns->pif && format != 0x1) {
+ status = NVME_INVALID_FORMAT | NVME_DNR;
+ goto invalid;
+ }
- status = nvme_h2c(n, (uint8_t *)iocb->ranges,
- sizeof(NvmeCopySourceRange) * nr, req);
+ if (ns->pif) {
+ len = sizeof(NvmeCopySourceRangeFormat1);
+ }
+
+ iocb->format = format;
+ iocb->ranges = g_malloc_n(nr, len);
+ status = nvme_h2c(n, (uint8_t *)iocb->ranges, len * nr, req);
if (status) {
goto invalid;
}
@@ -2926,6 +3027,7 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
iocb->nr = nr;
iocb->idx = 0;
iocb->reftag = le32_to_cpu(copy->reftag);
+ iocb->reftag |= (uint64_t)le32_to_cpu(copy->cdw3) << 32;
iocb->bounce = g_malloc_n(le16_to_cpu(ns->id_ns.mssrl),
ns->lbasz + ns->lbaf.ms);
@@ -3164,7 +3266,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
bool pract = prinfo & NVME_PRINFO_PRACT;
- if (pract && ns->lbaf.ms == 8) {
+ if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
mapped_size = data_size;
}
}
@@ -3241,7 +3343,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
bool pract = prinfo & NVME_PRINFO_PRACT;
- if (pract && ns->lbaf.ms == 8) {
+ if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
mapped_size -= nvme_m2b(ns, nlb);
}
}
@@ -4712,7 +4814,8 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
}
if (c->csi == NVME_CSI_NVM) {
- return nvme_rpt_empty_id_struct(n, req);
+ return nvme_c2h(n, (uint8_t *)&ns->id_ns_nvm, sizeof(NvmeIdNsNvm),
+ req);
} else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) {
return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned),
req);
@@ -5090,6 +5193,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
goto out;
case NVME_TIMESTAMP:
return nvme_get_feature_timestamp(n, req);
+ case NVME_HOST_BEHAVIOR_SUPPORT:
+ return nvme_c2h(n, (uint8_t *)&n->features.hbs,
+ sizeof(n->features.hbs), req);
default:
break;
}
@@ -5159,6 +5265,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
uint32_t nsid = le32_to_cpu(cmd->nsid);
uint8_t fid = NVME_GETSETFEAT_FID(dw10);
uint8_t save = NVME_SETFEAT_SAVE(dw10);
+ uint16_t status;
int i;
trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11);
@@ -5280,6 +5387,27 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
break;
case NVME_TIMESTAMP:
return nvme_set_feature_timestamp(n, req);
+ case NVME_HOST_BEHAVIOR_SUPPORT:
+ status = nvme_h2c(n, (uint8_t *)&n->features.hbs,
+ sizeof(n->features.hbs), req);
+ if (status) {
+ return status;
+ }
+
+ for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+ ns = nvme_ns(n, i);
+
+ if (!ns) {
+ continue;
+ }
+
+ ns->id_ns.nlbaf = ns->nlbaf - 1;
+ if (!n->features.hbs.lbafee) {
+ ns->id_ns.nlbaf = MIN(ns->id_ns.nlbaf, 15);
+ }
+ }
+
+ return status;
case NVME_COMMAND_SET_PROFILE:
if (dw11 & 0x1ff) {
trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff);
@@ -5443,6 +5571,11 @@ typedef struct NvmeFormatAIOCB {
uint32_t nsid;
bool broadcast;
int64_t offset;
+
+ uint8_t lbaf;
+ uint8_t mset;
+ uint8_t pi;
+ uint8_t pil;
} NvmeFormatAIOCB;
static void nvme_format_bh(void *opaque);
@@ -5462,18 +5595,16 @@ static const AIOCBInfo nvme_format_aiocb_info = {
.get_aio_context = nvme_get_aio_context,
};
-static void nvme_format_set(NvmeNamespace *ns, NvmeCmd *cmd)
+static void nvme_format_set(NvmeNamespace *ns, uint8_t lbaf, uint8_t mset,
+ uint8_t pi, uint8_t pil)
{
- uint32_t dw10 = le32_to_cpu(cmd->cdw10);
- uint8_t lbaf = dw10 & 0xf;
- uint8_t pi = (dw10 >> 5) & 0x7;
- uint8_t mset = (dw10 >> 4) & 0x1;
- uint8_t pil = (dw10 >> 8) & 0x1;
+ uint8_t lbafl = lbaf & 0xf;
+ uint8_t lbafu = lbaf >> 4;
trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil);
ns->id_ns.dps = (pil << 3) | pi;
- ns->id_ns.flbas = lbaf | (mset << 4);
+ ns->id_ns.flbas = (lbafu << 5) | (mset << 4) | lbafl;
nvme_ns_init_format(ns);
}
@@ -5481,7 +5612,6 @@ static void nvme_format_set(NvmeNamespace *ns, NvmeCmd *cmd)
static void nvme_format_ns_cb(void *opaque, int ret)
{
NvmeFormatAIOCB *iocb = opaque;
- NvmeRequest *req = iocb->req;
NvmeNamespace *ns = iocb->ns;
int bytes;
@@ -5503,7 +5633,7 @@ static void nvme_format_ns_cb(void *opaque, int ret)
return;
}
- nvme_format_set(ns, &req->cmd);
+ nvme_format_set(ns, iocb->lbaf, iocb->mset, iocb->pi, iocb->pil);
ns->status = 0x0;
iocb->ns = NULL;
iocb->offset = 0;
@@ -5523,7 +5653,7 @@ static uint16_t nvme_format_check(NvmeNamespace *ns, uint8_t lbaf, uint8_t pi)
return NVME_INVALID_FORMAT | NVME_DNR;
}
- if (pi && (ns->id_ns.lbaf[lbaf].ms < sizeof(NvmeDifTuple))) {
+ if (pi && (ns->id_ns.lbaf[lbaf].ms < nvme_pi_tuple_size(ns))) {
return NVME_INVALID_FORMAT | NVME_DNR;
}
@@ -5586,6 +5716,12 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req)
{
NvmeFormatAIOCB *iocb;
uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+ uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
+ uint8_t lbaf = dw10 & 0xf;
+ uint8_t mset = (dw10 >> 4) & 0x1;
+ uint8_t pi = (dw10 >> 5) & 0x7;
+ uint8_t pil = (dw10 >> 8) & 0x1;
+ uint8_t lbafu = (dw10 >> 12) & 0x3;
uint16_t status;
iocb = qemu_aio_get(&nvme_format_aiocb_info, NULL, nvme_misc_cb, req);
@@ -5595,9 +5731,17 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req)
iocb->ret = 0;
iocb->ns = NULL;
iocb->nsid = 0;
+ iocb->lbaf = lbaf;
+ iocb->mset = mset;
+ iocb->pi = pi;
+ iocb->pil = pil;
iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
iocb->offset = 0;
+ if (n->features.hbs.lbafee) {
+ iocb->lbaf |= lbafu << 4;
+ }
+
if (!iocb->broadcast) {
if (!nvme_nsid_valid(n, nsid)) {
status = NVME_INVALID_NSID | NVME_DNR;
@@ -6573,6 +6717,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->cntlid = cpu_to_le16(n->cntlid);
id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR);
+ id->ctratt |= cpu_to_le32(NVME_CTRATT_ELBAS);
id->rab = 6;
@@ -6627,7 +6772,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
*/
id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT;
- id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0);
+ id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0 | NVME_OCFS_COPY_FORMAT_1);
id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
NVME_CTRL_SGLS_BITBUCKET);
diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c
index 5dbd18b2a4..62d885f83e 100644
--- a/hw/nvme/dif.c
+++ b/hw/nvme/dif.c
@@ -13,13 +13,16 @@
#include "sysemu/block-backend.h"
#include "nvme.h"
+#include "dif.h"
#include "trace.h"
uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
- uint32_t reftag)
+ uint64_t reftag)
{
+ uint64_t mask = ns->pif ? 0xffffffffffff : 0xffffffff;
+
if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) &&
- (prinfo & NVME_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) {
+ (prinfo & NVME_PRINFO_PRCHK_REF) && (slba & mask) != reftag) {
return NVME_INVALID_PROT_INFO | NVME_DNR;
}
@@ -27,43 +30,58 @@ uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
}
/* from Linux kernel (crypto/crct10dif_common.c) */
-static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer,
- size_t len)
+static uint16_t crc16_t10dif(uint16_t crc, const unsigned char *buffer,
+ size_t len)
{
unsigned int i;
for (i = 0; i < len; i++) {
- crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
+ crc = (crc << 8) ^ crc16_t10dif_table[((crc >> 8) ^ buffer[i]) & 0xff];
}
return crc;
}
-void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
- uint8_t *mbuf, size_t mlen, uint16_t apptag,
- uint32_t *reftag)
+/* from Linux kernel (lib/crc64.c) */
+static uint64_t crc64_nvme(uint64_t crc, const unsigned char *buffer,
+ size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ crc = (crc >> 8) ^ crc64_nvme_table[(crc & 0xff) ^ buffer[i]];
+ }
+
+ return crc ^ (uint64_t)~0;
+}
+
+static void nvme_dif_pract_generate_dif_crc16(NvmeNamespace *ns, uint8_t *buf,
+ size_t len, uint8_t *mbuf,
+ size_t mlen, uint16_t apptag,
+ uint64_t *reftag)
{
uint8_t *end = buf + len;
int16_t pil = 0;
if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
- pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+ pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
}
- trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil,
- apptag, *reftag);
+ trace_pci_nvme_dif_pract_generate_dif_crc16(len, ns->lbasz,
+ ns->lbasz + pil, apptag,
+ *reftag);
for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
- uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
+ uint16_t crc = crc16_t10dif(0x0, buf, ns->lbasz);
if (pil) {
- crc = crc_t10dif(crc, mbuf, pil);
+ crc = crc16_t10dif(crc, mbuf, pil);
}
- dif->guard = cpu_to_be16(crc);
- dif->apptag = cpu_to_be16(apptag);
- dif->reftag = cpu_to_be32(*reftag);
+ dif->g16.guard = cpu_to_be16(crc);
+ dif->g16.apptag = cpu_to_be16(apptag);
+ dif->g16.reftag = cpu_to_be32(*reftag);
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
(*reftag)++;
@@ -71,57 +89,114 @@ void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
}
}
-static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
- uint8_t *buf, uint8_t *mbuf, size_t pil,
- uint8_t prinfo, uint16_t apptag,
- uint16_t appmask, uint32_t reftag)
+static void nvme_dif_pract_generate_dif_crc64(NvmeNamespace *ns, uint8_t *buf,
+ size_t len, uint8_t *mbuf,
+ size_t mlen, uint16_t apptag,
+ uint64_t *reftag)
+{
+ uint8_t *end = buf + len;
+ int16_t pil = 0;
+
+ if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
+ pil = ns->lbaf.ms - 16;
+ }
+
+ trace_pci_nvme_dif_pract_generate_dif_crc64(len, ns->lbasz,
+ ns->lbasz + pil, apptag,
+ *reftag);
+
+ for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
+ NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
+ uint64_t crc = crc64_nvme(~0ULL, buf, ns->lbasz);
+
+ if (pil) {
+ crc = crc64_nvme(crc, mbuf, pil);
+ }
+
+ dif->g64.guard = cpu_to_be64(crc);
+ dif->g64.apptag = cpu_to_be16(apptag);
+
+ dif->g64.sr[0] = *reftag >> 40;
+ dif->g64.sr[1] = *reftag >> 32;
+ dif->g64.sr[2] = *reftag >> 24;
+ dif->g64.sr[3] = *reftag >> 16;
+ dif->g64.sr[4] = *reftag >> 8;
+ dif->g64.sr[5] = *reftag;
+
+ if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
+ (*reftag)++;
+ }
+ }
+}
+
+void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
+ uint8_t *mbuf, size_t mlen, uint16_t apptag,
+ uint64_t *reftag)
+{
+ switch (ns->pif) {
+ case NVME_PI_GUARD_16:
+ return nvme_dif_pract_generate_dif_crc16(ns, buf, len, mbuf, mlen,
+ apptag, reftag);
+ case NVME_PI_GUARD_64:
+ return nvme_dif_pract_generate_dif_crc64(ns, buf, len, mbuf, mlen,
+ apptag, reftag);
+ }
+
+ abort();
+}
+
+static uint16_t nvme_dif_prchk_crc16(NvmeNamespace *ns, NvmeDifTuple *dif,
+ uint8_t *buf, uint8_t *mbuf, size_t pil,
+ uint8_t prinfo, uint16_t apptag,
+ uint16_t appmask, uint64_t reftag)
{
switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
case NVME_ID_NS_DPS_TYPE_3:
- if (be32_to_cpu(dif->reftag) != 0xffffffff) {
+ if (be32_to_cpu(dif->g16.reftag) != 0xffffffff) {
break;
}
/* fallthrough */
case NVME_ID_NS_DPS_TYPE_1:
case NVME_ID_NS_DPS_TYPE_2:
- if (be16_to_cpu(dif->apptag) != 0xffff) {
+ if (be16_to_cpu(dif->g16.apptag) != 0xffff) {
break;
}
- trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag),
- be32_to_cpu(dif->reftag));
+ trace_pci_nvme_dif_prchk_disabled_crc16(be16_to_cpu(dif->g16.apptag),
+ be32_to_cpu(dif->g16.reftag));
return NVME_SUCCESS;
}
if (prinfo & NVME_PRINFO_PRCHK_GUARD) {
- uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
+ uint16_t crc = crc16_t10dif(0x0, buf, ns->lbasz);
if (pil) {
- crc = crc_t10dif(crc, mbuf, pil);
+ crc = crc16_t10dif(crc, mbuf, pil);
}
- trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc);
+ trace_pci_nvme_dif_prchk_guard_crc16(be16_to_cpu(dif->g16.guard), crc);
- if (be16_to_cpu(dif->guard) != crc) {
+ if (be16_to_cpu(dif->g16.guard) != crc) {
return NVME_E2E_GUARD_ERROR;
}
}
if (prinfo & NVME_PRINFO_PRCHK_APP) {
- trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag,
+ trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->g16.apptag), apptag,
appmask);
- if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) {
+ if ((be16_to_cpu(dif->g16.apptag) & appmask) != (apptag & appmask)) {
return NVME_E2E_APP_ERROR;
}
}
if (prinfo & NVME_PRINFO_PRCHK_REF) {
- trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag);
+ trace_pci_nvme_dif_prchk_reftag_crc16(be32_to_cpu(dif->g16.reftag),
+ reftag);
- if (be32_to_cpu(dif->reftag) != reftag) {
+ if (be32_to_cpu(dif->g16.reftag) != reftag) {
return NVME_E2E_REF_ERROR;
}
}
@@ -129,12 +204,96 @@ static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
return NVME_SUCCESS;
}
+static uint16_t nvme_dif_prchk_crc64(NvmeNamespace *ns, NvmeDifTuple *dif,
+ uint8_t *buf, uint8_t *mbuf, size_t pil,
+ uint8_t prinfo, uint16_t apptag,
+ uint16_t appmask, uint64_t reftag)
+{
+ uint64_t r = 0;
+
+ r |= (uint64_t)dif->g64.sr[0] << 40;
+ r |= (uint64_t)dif->g64.sr[1] << 32;
+ r |= (uint64_t)dif->g64.sr[2] << 24;
+ r |= (uint64_t)dif->g64.sr[3] << 16;
+ r |= (uint64_t)dif->g64.sr[4] << 8;
+ r |= (uint64_t)dif->g64.sr[5];
+
+ switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+ case NVME_ID_NS_DPS_TYPE_3:
+ if (r != 0xffffffffffff) {
+ break;
+ }
+
+ /* fallthrough */
+ case NVME_ID_NS_DPS_TYPE_1:
+ case NVME_ID_NS_DPS_TYPE_2:
+ if (be16_to_cpu(dif->g64.apptag) != 0xffff) {
+ break;
+ }
+
+ trace_pci_nvme_dif_prchk_disabled_crc64(be16_to_cpu(dif->g16.apptag),
+ r);
+
+ return NVME_SUCCESS;
+ }
+
+ if (prinfo & NVME_PRINFO_PRCHK_GUARD) {
+ uint64_t crc = crc64_nvme(~0ULL, buf, ns->lbasz);
+
+ if (pil) {
+ crc = crc64_nvme(crc, mbuf, pil);
+ }
+
+ trace_pci_nvme_dif_prchk_guard_crc64(be64_to_cpu(dif->g64.guard), crc);
+
+ if (be64_to_cpu(dif->g64.guard) != crc) {
+ return NVME_E2E_GUARD_ERROR;
+ }
+ }
+
+ if (prinfo & NVME_PRINFO_PRCHK_APP) {
+ trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->g64.apptag), apptag,
+ appmask);
+
+ if ((be16_to_cpu(dif->g64.apptag) & appmask) != (apptag & appmask)) {
+ return NVME_E2E_APP_ERROR;
+ }
+ }
+
+ if (prinfo & NVME_PRINFO_PRCHK_REF) {
+ trace_pci_nvme_dif_prchk_reftag_crc64(r, reftag);
+
+ if (r != reftag) {
+ return NVME_E2E_REF_ERROR;
+ }
+ }
+
+ return NVME_SUCCESS;
+}
+
+static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
+ uint8_t *buf, uint8_t *mbuf, size_t pil,
+ uint8_t prinfo, uint16_t apptag,
+ uint16_t appmask, uint64_t reftag)
+{
+ switch (ns->pif) {
+ case NVME_PI_GUARD_16:
+ return nvme_dif_prchk_crc16(ns, dif, buf, mbuf, pil, prinfo, apptag,
+ appmask, reftag);
+ case NVME_PI_GUARD_64:
+ return nvme_dif_prchk_crc64(ns, dif, buf, mbuf, pil, prinfo, apptag,
+ appmask, reftag);
+ }
+
+ abort();
+}
+
uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
uint8_t *mbuf, size_t mlen, uint8_t prinfo,
uint64_t slba, uint16_t apptag,
- uint16_t appmask, uint32_t *reftag)
+ uint16_t appmask, uint64_t *reftag)
{
- uint8_t *end = buf + len;
+ uint8_t *bufp, *end = buf + len;
int16_t pil = 0;
uint16_t status;
@@ -144,18 +303,34 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
}
if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
- pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+ pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
}
trace_pci_nvme_dif_check(prinfo, ns->lbasz + pil);
- for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
+ for (bufp = buf; bufp < end; bufp += ns->lbasz, mbuf += ns->lbaf.ms) {
NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
-
- status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, prinfo, apptag,
+ status = nvme_dif_prchk(ns, dif, bufp, mbuf, pil, prinfo, apptag,
appmask, *reftag);
if (status) {
- return status;
+ /*
+ * The first block of a 'raw' image is always allocated, so we
+ * cannot reliably know if the block is all zeroes or not. For
+ * CRC16 this works fine because the T10 CRC16 is 0x0 for all
+ * zeroes, but the Rocksoft CRC64 is not. Thus, if a guard error is
+ * detected for the first block, check if it is zeroed and manually
+ * set the protection information to all ones to disable protection
+ * information checking.
+ */
+ if (status == NVME_E2E_GUARD_ERROR && slba == 0x0 && bufp == buf) {
+ g_autofree uint8_t *zeroes = g_malloc0(ns->lbasz);
+
+ if (memcmp(bufp, zeroes, ns->lbasz) == 0) {
+ memset(mbuf + pil, 0xff, nvme_pi_tuple_size(ns));
+ }
+ } else {
+ return status;
+ }
}
if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
@@ -183,7 +358,7 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
- pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+ pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
}
do {
@@ -209,7 +384,7 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
end = mbufp + mlen;
for (; mbufp < end; mbufp += ns->lbaf.ms) {
- memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple));
+ memset(mbufp + pil, 0xff, nvme_pi_tuple_size(ns));
}
}
@@ -251,9 +426,12 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret)
uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
uint16_t apptag = le16_to_cpu(rw->apptag);
uint16_t appmask = le16_to_cpu(rw->appmask);
- uint32_t reftag = le32_to_cpu(rw->reftag);
+ uint64_t reftag = le32_to_cpu(rw->reftag);
+ uint64_t cdw3 = le32_to_cpu(rw->cdw3);
uint16_t status;
+ reftag |= cdw3 << 32;
+
trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), prinfo, apptag, appmask,
reftag);
@@ -283,7 +461,7 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret)
goto out;
}
- if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == 8) {
+ if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == nvme_pi_tuple_size(ns)) {
goto out;
}
@@ -367,11 +545,14 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
uint16_t apptag = le16_to_cpu(rw->apptag);
uint16_t appmask = le16_to_cpu(rw->appmask);
- uint32_t reftag = le32_to_cpu(rw->reftag);
+ uint64_t reftag = le32_to_cpu(rw->reftag);
+ uint64_t cdw3 = le32_to_cpu(rw->cdw3);
bool pract = !!(prinfo & NVME_PRINFO_PRACT);
NvmeBounceContext *ctx;
uint16_t status;
+ reftag |= cdw3 << 32;
+
trace_pci_nvme_dif_rw(pract, prinfo);
ctx = g_new0(NvmeBounceContext, 1);
@@ -387,7 +568,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
if (pract) {
uint8_t *mbuf, *end;
- int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+ int16_t pil = ns->lbaf.ms - nvme_pi_tuple_size(ns);
status = nvme_check_prinfo(ns, prinfo, slba, reftag);
if (status) {
@@ -411,8 +592,29 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
for (; mbuf < end; mbuf += ns->lbaf.ms) {
NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
- dif->apptag = cpu_to_be16(apptag);
- dif->reftag = cpu_to_be32(reftag);
+ switch (ns->pif) {
+ case NVME_PI_GUARD_16:
+ dif->g16.apptag = cpu_to_be16(apptag);
+ dif->g16.reftag = cpu_to_be32(reftag);
+
+ break;
+
+ case NVME_PI_GUARD_64:
+ dif->g64.guard = cpu_to_be64(0x6482d367eb22b64e);
+ dif->g64.apptag = cpu_to_be16(apptag);
+
+ dif->g64.sr[0] = reftag >> 40;
+ dif->g64.sr[1] = reftag >> 32;
+ dif->g64.sr[2] = reftag >> 24;
+ dif->g64.sr[3] = reftag >> 16;
+ dif->g64.sr[4] = reftag >> 8;
+ dif->g64.sr[5] = reftag;
+
+ break;
+
+ default:
+ abort();
+ }
switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
case NVME_ID_NS_DPS_TYPE_1:
@@ -427,7 +629,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
return NVME_NO_COMPLETE;
}
- if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == 8)) {
+ if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
mapped_len += mlen;
}
@@ -461,7 +663,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
qemu_iovec_init(&ctx->mdata.iov, 1);
qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
- if (!(pract && ns->lbaf.ms == 8)) {
+ if (!(pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) {
status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
NVME_TX_DIRECTION_TO_DEVICE, req);
if (status) {
diff --git a/hw/nvme/dif.h b/hw/nvme/dif.h
new file mode 100644
index 0000000000..f12e312250
--- /dev/null
+++ b/hw/nvme/dif.h
@@ -0,0 +1,191 @@
+#ifndef HW_NVME_DIF_H
+#define HW_NVME_DIF_H
+
+/* from Linux kernel (crypto/crct10dif_common.c) */
+static const uint16_t crc16_t10dif_table[256] = {
+ 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+ 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+ 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+ 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+ 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+ 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+ 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+ 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+ 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+ 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+ 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+ 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+ 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+ 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+ 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+ 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+ 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+ 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+ 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+ 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+ 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+ 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+ 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+ 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+ 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+ 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+ 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+ 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+ 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+ 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+ 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+ 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
+};
+
+#define CRC64_NVME_POLY 0x9A6C9329AC4BC9B5ULL
+
+static const uint64_t crc64_nvme_table[] = {
+ 0x0000000000000000ULL, 0x7F6EF0C830358979ULL,
+ 0xFEDDE190606B12F2ULL, 0x81B31158505E9B8BULL,
+ 0xC962E5739841B68FULL, 0xB60C15BBA8743FF6ULL,
+ 0x37BF04E3F82AA47DULL, 0x48D1F42BC81F2D04ULL,
+ 0xA61CECB46814FE75ULL, 0xD9721C7C5821770CULL,
+ 0x58C10D24087FEC87ULL, 0x27AFFDEC384A65FEULL,
+ 0x6F7E09C7F05548FAULL, 0x1010F90FC060C183ULL,
+ 0x91A3E857903E5A08ULL, 0xEECD189FA00BD371ULL,
+ 0x78E0FF3B88BE6F81ULL, 0x078E0FF3B88BE6F8ULL,
+ 0x863D1EABE8D57D73ULL, 0xF953EE63D8E0F40AULL,
+ 0xB1821A4810FFD90EULL, 0xCEECEA8020CA5077ULL,
+ 0x4F5FFBD87094CBFCULL, 0x30310B1040A14285ULL,
+ 0xDEFC138FE0AA91F4ULL, 0xA192E347D09F188DULL,
+ 0x2021F21F80C18306ULL, 0x5F4F02D7B0F40A7FULL,
+ 0x179EF6FC78EB277BULL, 0x68F0063448DEAE02ULL,
+ 0xE943176C18803589ULL, 0x962DE7A428B5BCF0ULL,
+ 0xF1C1FE77117CDF02ULL, 0x8EAF0EBF2149567BULL,
+ 0x0F1C1FE77117CDF0ULL, 0x7072EF2F41224489ULL,
+ 0x38A31B04893D698DULL, 0x47CDEBCCB908E0F4ULL,
+ 0xC67EFA94E9567B7FULL, 0xB9100A5CD963F206ULL,
+ 0x57DD12C379682177ULL, 0x28B3E20B495DA80EULL,
+ 0xA900F35319033385ULL, 0xD66E039B2936BAFCULL,
+ 0x9EBFF7B0E12997F8ULL, 0xE1D10778D11C1E81ULL,
+ 0x606216208142850AULL, 0x1F0CE6E8B1770C73ULL,
+ 0x8921014C99C2B083ULL, 0xF64FF184A9F739FAULL,
+ 0x77FCE0DCF9A9A271ULL, 0x08921014C99C2B08ULL,
+ 0x4043E43F0183060CULL, 0x3F2D14F731B68F75ULL,
+ 0xBE9E05AF61E814FEULL, 0xC1F0F56751DD9D87ULL,
+ 0x2F3DEDF8F1D64EF6ULL, 0x50531D30C1E3C78FULL,
+ 0xD1E00C6891BD5C04ULL, 0xAE8EFCA0A188D57DULL,
+ 0xE65F088B6997F879ULL, 0x9931F84359A27100ULL,
+ 0x1882E91B09FCEA8BULL, 0x67EC19D339C963F2ULL,
+ 0xD75ADABD7A6E2D6FULL, 0xA8342A754A5BA416ULL,
+ 0x29873B2D1A053F9DULL, 0x56E9CBE52A30B6E4ULL,
+ 0x1E383FCEE22F9BE0ULL, 0x6156CF06D21A1299ULL,
+ 0xE0E5DE5E82448912ULL, 0x9F8B2E96B271006BULL,
+ 0x71463609127AD31AULL, 0x0E28C6C1224F5A63ULL,
+ 0x8F9BD7997211C1E8ULL, 0xF0F5275142244891ULL,
+ 0xB824D37A8A3B6595ULL, 0xC74A23B2BA0EECECULL,
+ 0x46F932EAEA507767ULL, 0x3997C222DA65FE1EULL,
+ 0xAFBA2586F2D042EEULL, 0xD0D4D54EC2E5CB97ULL,
+ 0x5167C41692BB501CULL, 0x2E0934DEA28ED965ULL,
+ 0x66D8C0F56A91F461ULL, 0x19B6303D5AA47D18ULL,
+ 0x980521650AFAE693ULL, 0xE76BD1AD3ACF6FEAULL,
+ 0x09A6C9329AC4BC9BULL, 0x76C839FAAAF135E2ULL,
+ 0xF77B28A2FAAFAE69ULL, 0x8815D86ACA9A2710ULL,
+ 0xC0C42C4102850A14ULL, 0xBFAADC8932B0836DULL,
+ 0x3E19CDD162EE18E6ULL, 0x41773D1952DB919FULL,
+ 0x269B24CA6B12F26DULL, 0x59F5D4025B277B14ULL,
+ 0xD846C55A0B79E09FULL, 0xA72835923B4C69E6ULL,
+ 0xEFF9C1B9F35344E2ULL, 0x90973171C366CD9BULL,
+ 0x1124202993385610ULL, 0x6E4AD0E1A30DDF69ULL,
+ 0x8087C87E03060C18ULL, 0xFFE938B633338561ULL,
+ 0x7E5A29EE636D1EEAULL, 0x0134D92653589793ULL,
+ 0x49E52D0D9B47BA97ULL, 0x368BDDC5AB7233EEULL,
+ 0xB738CC9DFB2CA865ULL, 0xC8563C55CB19211CULL,
+ 0x5E7BDBF1E3AC9DECULL, 0x21152B39D3991495ULL,
+ 0xA0A63A6183C78F1EULL, 0xDFC8CAA9B3F20667ULL,
+ 0x97193E827BED2B63ULL, 0xE877CE4A4BD8A21AULL,
+ 0x69C4DF121B863991ULL, 0x16AA2FDA2BB3B0E8ULL,
+ 0xF86737458BB86399ULL, 0x8709C78DBB8DEAE0ULL,
+ 0x06BAD6D5EBD3716BULL, 0x79D4261DDBE6F812ULL,
+ 0x3105D23613F9D516ULL, 0x4E6B22FE23CC5C6FULL,
+ 0xCFD833A67392C7E4ULL, 0xB0B6C36E43A74E9DULL,
+ 0x9A6C9329AC4BC9B5ULL, 0xE50263E19C7E40CCULL,
+ 0x64B172B9CC20DB47ULL, 0x1BDF8271FC15523EULL,
+ 0x530E765A340A7F3AULL, 0x2C608692043FF643ULL,
+ 0xADD397CA54616DC8ULL, 0xD2BD67026454E4B1ULL,
+ 0x3C707F9DC45F37C0ULL, 0x431E8F55F46ABEB9ULL,
+ 0xC2AD9E0DA4342532ULL, 0xBDC36EC59401AC4BULL,
+ 0xF5129AEE5C1E814FULL, 0x8A7C6A266C2B0836ULL,
+ 0x0BCF7B7E3C7593BDULL, 0x74A18BB60C401AC4ULL,
+ 0xE28C6C1224F5A634ULL, 0x9DE29CDA14C02F4DULL,
+ 0x1C518D82449EB4C6ULL, 0x633F7D4A74AB3DBFULL,
+ 0x2BEE8961BCB410BBULL, 0x548079A98C8199C2ULL,
+ 0xD53368F1DCDF0249ULL, 0xAA5D9839ECEA8B30ULL,
+ 0x449080A64CE15841ULL, 0x3BFE706E7CD4D138ULL,
+ 0xBA4D61362C8A4AB3ULL, 0xC52391FE1CBFC3CAULL,
+ 0x8DF265D5D4A0EECEULL, 0xF29C951DE49567B7ULL,
+ 0x732F8445B4CBFC3CULL, 0x0C41748D84FE7545ULL,
+ 0x6BAD6D5EBD3716B7ULL, 0x14C39D968D029FCEULL,
+ 0x95708CCEDD5C0445ULL, 0xEA1E7C06ED698D3CULL,
+ 0xA2CF882D2576A038ULL, 0xDDA178E515432941ULL,
+ 0x5C1269BD451DB2CAULL, 0x237C997575283BB3ULL,
+ 0xCDB181EAD523E8C2ULL, 0xB2DF7122E51661BBULL,
+ 0x336C607AB548FA30ULL, 0x4C0290B2857D7349ULL,
+ 0x04D364994D625E4DULL, 0x7BBD94517D57D734ULL,
+ 0xFA0E85092D094CBFULL, 0x856075C11D3CC5C6ULL,
+ 0x134D926535897936ULL, 0x6C2362AD05BCF04FULL,
+ 0xED9073F555E26BC4ULL, 0x92FE833D65D7E2BDULL,
+ 0xDA2F7716ADC8CFB9ULL, 0xA54187DE9DFD46C0ULL,
+ 0x24F29686CDA3DD4BULL, 0x5B9C664EFD965432ULL,
+ 0xB5517ED15D9D8743ULL, 0xCA3F8E196DA80E3AULL,
+ 0x4B8C9F413DF695B1ULL, 0x34E26F890DC31CC8ULL,
+ 0x7C339BA2C5DC31CCULL, 0x035D6B6AF5E9B8B5ULL,
+ 0x82EE7A32A5B7233EULL, 0xFD808AFA9582AA47ULL,
+ 0x4D364994D625E4DAULL, 0x3258B95CE6106DA3ULL,
+ 0xB3EBA804B64EF628ULL, 0xCC8558CC867B7F51ULL,
+ 0x8454ACE74E645255ULL, 0xFB3A5C2F7E51DB2CULL,
+ 0x7A894D772E0F40A7ULL, 0x05E7BDBF1E3AC9DEULL,
+ 0xEB2AA520BE311AAFULL, 0x944455E88E0493D6ULL,
+ 0x15F744B0DE5A085DULL, 0x6A99B478EE6F8124ULL,
+ 0x224840532670AC20ULL, 0x5D26B09B16452559ULL,
+ 0xDC95A1C3461BBED2ULL, 0xA3FB510B762E37ABULL,
+ 0x35D6B6AF5E9B8B5BULL, 0x4AB846676EAE0222ULL,
+ 0xCB0B573F3EF099A9ULL, 0xB465A7F70EC510D0ULL,
+ 0xFCB453DCC6DA3DD4ULL, 0x83DAA314F6EFB4ADULL,
+ 0x0269B24CA6B12F26ULL, 0x7D0742849684A65FULL,
+ 0x93CA5A1B368F752EULL, 0xECA4AAD306BAFC57ULL,
+ 0x6D17BB8B56E467DCULL, 0x12794B4366D1EEA5ULL,
+ 0x5AA8BF68AECEC3A1ULL, 0x25C64FA09EFB4AD8ULL,
+ 0xA4755EF8CEA5D153ULL, 0xDB1BAE30FE90582AULL,
+ 0xBCF7B7E3C7593BD8ULL, 0xC399472BF76CB2A1ULL,
+ 0x422A5673A732292AULL, 0x3D44A6BB9707A053ULL,
+ 0x759552905F188D57ULL, 0x0AFBA2586F2D042EULL,
+ 0x8B48B3003F739FA5ULL, 0xF42643C80F4616DCULL,
+ 0x1AEB5B57AF4DC5ADULL, 0x6585AB9F9F784CD4ULL,
+ 0xE436BAC7CF26D75FULL, 0x9B584A0FFF135E26ULL,
+ 0xD389BE24370C7322ULL, 0xACE74EEC0739FA5BULL,
+ 0x2D545FB4576761D0ULL, 0x523AAF7C6752E8A9ULL,
+ 0xC41748D84FE75459ULL, 0xBB79B8107FD2DD20ULL,
+ 0x3ACAA9482F8C46ABULL, 0x45A459801FB9CFD2ULL,
+ 0x0D75ADABD7A6E2D6ULL, 0x721B5D63E7936BAFULL,
+ 0xF3A84C3BB7CDF024ULL, 0x8CC6BCF387F8795DULL,
+ 0x620BA46C27F3AA2CULL, 0x1D6554A417C62355ULL,
+ 0x9CD645FC4798B8DEULL, 0xE3B8B53477AD31A7ULL,
+ 0xAB69411FBFB21CA3ULL, 0xD407B1D78F8795DAULL,
+ 0x55B4A08FDFD90E51ULL, 0x2ADA5047EFEC8728ULL,
+};
+
+static inline size_t nvme_pi_tuple_size(NvmeNamespace *ns)
+{
+ return ns->pif ? 16 : 8;
+}
+
+uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
+ uint64_t reftag);
+uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
+ uint64_t slba);
+void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
+ uint8_t *mbuf, size_t mlen, uint16_t apptag,
+ uint64_t *reftag);
+uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
+ uint8_t *mbuf, size_t mlen, uint8_t prinfo,
+ uint64_t slba, uint16_t apptag,
+ uint16_t appmask, uint64_t *reftag);
+uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req);
+
+#endif /* HW_NVME_DIF_H */
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index ee673f1a5b..8a3613d9ab 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -58,6 +58,7 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
{
static uint64_t ns_count;
NvmeIdNs *id_ns = &ns->id_ns;
+ NvmeIdNsNvm *id_ns_nvm = &ns->id_ns_nvm;
uint8_t ds;
uint16_t ms;
int i;
@@ -101,6 +102,8 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
id_ns->dps |= NVME_ID_NS_DPS_FIRST_EIGHT;
}
+ ns->pif = ns->params.pif;
+
static const NvmeLBAF lbaf[16] = {
[0] = { .ds = 9 },
[1] = { .ds = 9, .ms = 8 },
@@ -112,10 +115,11 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
[7] = { .ds = 12, .ms = 64 },
};
+ ns->nlbaf = 8;
+
memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf));
- id_ns->nlbaf = 7;
- for (i = 0; i <= id_ns->nlbaf; i++) {
+ for (i = 0; i < ns->nlbaf; i++) {
NvmeLBAF *lbaf = &id_ns->lbaf[i];
if (lbaf->ds == ds) {
if (lbaf->ms == ms) {
@@ -126,12 +130,16 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
}
/* add non-standard lba format */
- id_ns->nlbaf++;
- id_ns->lbaf[id_ns->nlbaf].ds = ds;
- id_ns->lbaf[id_ns->nlbaf].ms = ms;
- id_ns->flbas |= id_ns->nlbaf;
+ id_ns->lbaf[ns->nlbaf].ds = ds;
+ id_ns->lbaf[ns->nlbaf].ms = ms;
+ ns->nlbaf++;
+
+ id_ns->flbas |= i;
+
lbaf_found:
+ id_ns_nvm->elbaf[i] = (ns->pif & 0x3) << 7;
+ id_ns->nlbaf = ns->nlbaf - 1;
nvme_ns_init_format(ns);
return 0;
@@ -370,15 +378,36 @@ static void nvme_zoned_ns_shutdown(NvmeNamespace *ns)
static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
{
+ unsigned int pi_size;
+
if (!ns->blkconf.blk) {
error_setg(errp, "block backend not configured");
return -1;
}
- if (ns->params.pi && ns->params.ms < 8) {
- error_setg(errp, "at least 8 bytes of metadata required to enable "
- "protection information");
- return -1;
+ if (ns->params.pi) {
+ if (ns->params.pi > NVME_ID_NS_DPS_TYPE_3) {
+ error_setg(errp, "invalid 'pi' value");
+ return -1;
+ }
+
+ switch (ns->params.pif) {
+ case NVME_PI_GUARD_16:
+ pi_size = 8;
+ break;
+ case NVME_PI_GUARD_64:
+ pi_size = 16;
+ break;
+ default:
+ error_setg(errp, "invalid 'pif'");
+ return -1;
+ }
+
+ if (ns->params.ms < pi_size) {
+ error_setg(errp, "at least %u bytes of metadata required to "
+ "enable protection information", pi_size);
+ return -1;
+ }
}
if (ns->params.nsid > NVME_MAX_NAMESPACES) {
@@ -590,6 +619,7 @@ static Property nvme_ns_props[] = {
DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0),
DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0),
DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0),
+ DEFINE_PROP_UINT8("pif", NvmeNamespace, params.pif, 0),
DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128),
DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128),
DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127),
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 90c0bb7ce2..739c8b8f79 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -102,6 +102,7 @@ typedef struct NvmeNamespaceParams {
uint8_t mset;
uint8_t pi;
uint8_t pil;
+ uint8_t pif;
uint16_t mssrl;
uint32_t mcl;
@@ -127,12 +128,15 @@ typedef struct NvmeNamespace {
int64_t size;
int64_t moff;
NvmeIdNs id_ns;
+ NvmeIdNsNvm id_ns_nvm;
NvmeLBAF lbaf;
+ unsigned int nlbaf;
size_t lbasz;
const uint32_t *iocs;
uint8_t csi;
uint16_t status;
int attached;
+ uint8_t pif;
struct {
uint16_t zrwas;
@@ -468,7 +472,9 @@ typedef struct NvmeCtrl {
uint16_t temp_thresh_hi;
uint16_t temp_thresh_low;
};
- uint32_t async_config;
+
+ uint32_t async_config;
+ NvmeHostBehaviorSupport hbs;
} features;
} NvmeCtrl;
@@ -513,54 +519,4 @@ void nvme_rw_complete_cb(void *opaque, int ret);
uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
NvmeCmd *cmd);
-/* from Linux kernel (crypto/crct10dif_common.c) */
-static const uint16_t t10_dif_crc_table[256] = {
- 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
- 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
- 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
- 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
- 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
- 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
- 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
- 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
- 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
- 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
- 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
- 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
- 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
- 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
- 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
- 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
- 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
- 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
- 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
- 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
- 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
- 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
- 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
- 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
- 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
- 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
- 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
- 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
- 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
- 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
- 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
- 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
-};
-
-uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
- uint32_t reftag);
-uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
- uint64_t slba);
-void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
- uint8_t *mbuf, size_t mlen, uint16_t apptag,
- uint32_t *reftag);
-uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
- uint8_t *mbuf, size_t mlen, uint8_t prinfo,
- uint64_t slba, uint16_t apptag,
- uint16_t appmask, uint32_t *reftag);
-uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req);
-
-
#endif /* HW_NVME_INTERNAL_H */
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
index 90730d802f..ff1b458969 100644
--- a/hw/nvme/trace-events
+++ b/hw/nvme/trace-events
@@ -20,12 +20,16 @@ pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_dif_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32""
-pci_nvme_dif_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32""
+pci_nvme_dif_pract_generate_dif_crc16(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32""
+pci_nvme_dif_pract_generate_dif_crc64(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint64_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx64""
pci_nvme_dif_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16""
-pci_nvme_dif_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32""
-pci_nvme_dif_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16""
+pci_nvme_dif_prchk_disabled_crc16(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32""
+pci_nvme_dif_prchk_disabled_crc64(uint16_t apptag, uint64_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx64""
+pci_nvme_dif_prchk_guard_crc16(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16""
+pci_nvme_dif_prchk_guard_crc64(uint64_t guard, uint64_t crc) "guard 0x%"PRIx64" crc 0x%"PRIx64""
pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16""
-pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32""
+pci_nvme_dif_prchk_reftag_crc16(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32""
+pci_nvme_dif_prchk_reftag_crc64(uint64_t reftag, uint64_t elbrt) "reftag 0x%"PRIx64" elbrt 0x%"PRIx64""
pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8""
pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32""
pci_nvme_copy_out(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32""
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index fbfdf47e26..18b43be7f6 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -219,7 +219,7 @@ static void postload_update_cb(void *opaque, bool running, RunState state)
{
SpaprNvram *nvram = opaque;
- /* This is called after bdrv_invalidate_cache_all. */
+ /* This is called after bdrv_activate_all. */
qemu_del_vm_change_state_handler(nvram->vmstate);
nvram->vmstate = NULL;
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 4cc204f90d..953fc65fa8 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -27,6 +27,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/datadir.h"
+#include "qemu/memalign.h"
#include "qapi/error.h"
#include "qapi/qapi-events-machine.h"
#include "qapi/qapi-events-qdev.h"
diff --git a/hw/ppc/spapr_softmmu.c b/hw/ppc/spapr_softmmu.c
index 4ee03c83e4..5170a33369 100644
--- a/hw/ppc/spapr_softmmu.c
+++ b/hw/ppc/spapr_softmmu.c
@@ -1,5 +1,6 @@
#include "qemu/osdep.h"
#include "qemu/cutils.h"
+#include "qemu/memalign.h"
#include "cpu.h"
#include "helper_regs.h"
#include "hw/ppc/spapr.h"
diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index d2d869aaad..91bb9d21c4 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -42,6 +42,8 @@ config RISCV_VIRT
select PFLASH_CFI01
select SERIAL
select RISCV_ACLINT
+ select RISCV_APLIC
+ select RISCV_IMSIC
select SIFIVE_PLIC
select SIFIVE_TEST
select VIRTIO_MMIO
diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c
index aec7cfa33f..833624d66c 100644
--- a/hw/riscv/opentitan.c
+++ b/hw/riscv/opentitan.c
@@ -34,13 +34,15 @@ static const MemMapEntry ibex_memmap[] = {
[IBEX_DEV_FLASH] = { 0x20000000, 0x80000 },
[IBEX_DEV_UART] = { 0x40000000, 0x1000 },
[IBEX_DEV_GPIO] = { 0x40040000, 0x1000 },
- [IBEX_DEV_SPI] = { 0x40050000, 0x1000 },
+ [IBEX_DEV_SPI_DEVICE] = { 0x40050000, 0x1000 },
[IBEX_DEV_I2C] = { 0x40080000, 0x1000 },
[IBEX_DEV_PATTGEN] = { 0x400e0000, 0x1000 },
[IBEX_DEV_TIMER] = { 0x40100000, 0x1000 },
[IBEX_DEV_SENSOR_CTRL] = { 0x40110000, 0x1000 },
[IBEX_DEV_OTP_CTRL] = { 0x40130000, 0x4000 },
[IBEX_DEV_USBDEV] = { 0x40150000, 0x1000 },
+ [IBEX_DEV_SPI_HOST0] = { 0x40300000, 0x1000 },
+ [IBEX_DEV_SPI_HOST1] = { 0x40310000, 0x1000 },
[IBEX_DEV_PWRMGR] = { 0x40400000, 0x1000 },
[IBEX_DEV_RSTMGR] = { 0x40410000, 0x1000 },
[IBEX_DEV_CLKMGR] = { 0x40420000, 0x1000 },
@@ -209,8 +211,12 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp)
create_unimplemented_device("riscv.lowrisc.ibex.gpio",
memmap[IBEX_DEV_GPIO].base, memmap[IBEX_DEV_GPIO].size);
- create_unimplemented_device("riscv.lowrisc.ibex.spi",
- memmap[IBEX_DEV_SPI].base, memmap[IBEX_DEV_SPI].size);
+ create_unimplemented_device("riscv.lowrisc.ibex.spi_device",
+ memmap[IBEX_DEV_SPI_DEVICE].base, memmap[IBEX_DEV_SPI_DEVICE].size);
+ create_unimplemented_device("riscv.lowrisc.ibex.spi_host0",
+ memmap[IBEX_DEV_SPI_HOST0].base, memmap[IBEX_DEV_SPI_HOST0].size);
+ create_unimplemented_device("riscv.lowrisc.ibex.spi_host1",
+ memmap[IBEX_DEV_SPI_HOST1].base, memmap[IBEX_DEV_SPI_HOST1].size);
create_unimplemented_device("riscv.lowrisc.ibex.i2c",
memmap[IBEX_DEV_I2C].base, memmap[IBEX_DEV_I2C].size);
create_unimplemented_device("riscv.lowrisc.ibex.pattgen",
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index e3068d6126..da50cbed43 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -33,6 +33,8 @@
#include "hw/riscv/boot.h"
#include "hw/riscv/numa.h"
#include "hw/intc/riscv_aclint.h"
+#include "hw/intc/riscv_aplic.h"
+#include "hw/intc/riscv_imsic.h"
#include "hw/intc/sifive_plic.h"
#include "hw/misc/sifive_test.h"
#include "chardev/char.h"
@@ -43,6 +45,28 @@
#include "hw/pci-host/gpex.h"
#include "hw/display/ramfb.h"
+/*
+ * The virt machine physical address space used by some of the devices
+ * namely ACLINT, PLIC, APLIC, and IMSIC depend on number of Sockets,
+ * number of CPUs, and number of IMSIC guest files.
+ *
+ * Various limits defined by VIRT_SOCKETS_MAX_BITS, VIRT_CPUS_MAX_BITS,
+ * and VIRT_IRQCHIP_MAX_GUESTS_BITS are tuned for maximum utilization
+ * of virt machine physical address space.
+ */
+
+#define VIRT_IMSIC_GROUP_MAX_SIZE (1U << IMSIC_MMIO_GROUP_MIN_SHIFT)
+#if VIRT_IMSIC_GROUP_MAX_SIZE < \
+ IMSIC_GROUP_SIZE(VIRT_CPUS_MAX_BITS, VIRT_IRQCHIP_MAX_GUESTS_BITS)
+#error "Can't accomodate single IMSIC group in address space"
+#endif
+
+#define VIRT_IMSIC_MAX_SIZE (VIRT_SOCKETS_MAX * \
+ VIRT_IMSIC_GROUP_MAX_SIZE)
+#if 0x4000000 < VIRT_IMSIC_MAX_SIZE
+#error "Can't accomodate all IMSIC groups in address space"
+#endif
+
static const MemMapEntry virt_memmap[] = {
[VIRT_DEBUG] = { 0x0, 0x100 },
[VIRT_MROM] = { 0x1000, 0xf000 },
@@ -52,10 +76,14 @@ static const MemMapEntry virt_memmap[] = {
[VIRT_ACLINT_SSWI] = { 0x2F00000, 0x4000 },
[VIRT_PCIE_PIO] = { 0x3000000, 0x10000 },
[VIRT_PLIC] = { 0xc000000, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) },
+ [VIRT_APLIC_M] = { 0xc000000, APLIC_SIZE(VIRT_CPUS_MAX) },
+ [VIRT_APLIC_S] = { 0xd000000, APLIC_SIZE(VIRT_CPUS_MAX) },
[VIRT_UART0] = { 0x10000000, 0x100 },
[VIRT_VIRTIO] = { 0x10001000, 0x1000 },
[VIRT_FW_CFG] = { 0x10100000, 0x18 },
[VIRT_FLASH] = { 0x20000000, 0x4000000 },
+ [VIRT_IMSIC_M] = { 0x24000000, VIRT_IMSIC_MAX_SIZE },
+ [VIRT_IMSIC_S] = { 0x28000000, VIRT_IMSIC_MAX_SIZE },
[VIRT_PCIE_ECAM] = { 0x30000000, 0x10000000 },
[VIRT_PCIE_MMIO] = { 0x40000000, 0x40000000 },
[VIRT_DRAM] = { 0x80000000, 0x0 },
@@ -133,12 +161,13 @@ static void virt_flash_map(RISCVVirtState *s,
sysmem);
}
-static void create_pcie_irq_map(void *fdt, char *nodename,
- uint32_t plic_phandle)
+static void create_pcie_irq_map(RISCVVirtState *s, void *fdt, char *nodename,
+ uint32_t irqchip_phandle)
{
int pin, dev;
- uint32_t
- full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS * FDT_INT_MAP_WIDTH] = {};
+ uint32_t irq_map_stride = 0;
+ uint32_t full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS *
+ FDT_MAX_INT_MAP_WIDTH] = {};
uint32_t *irq_map = full_irq_map;
/* This code creates a standard swizzle of interrupts such that
@@ -156,23 +185,31 @@ static void create_pcie_irq_map(void *fdt, char *nodename,
int irq_nr = PCIE_IRQ + ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS);
int i = 0;
+ /* Fill PCI address cells */
irq_map[i] = cpu_to_be32(devfn << 8);
-
i += FDT_PCI_ADDR_CELLS;
- irq_map[i] = cpu_to_be32(pin + 1);
+ /* Fill PCI Interrupt cells */
+ irq_map[i] = cpu_to_be32(pin + 1);
i += FDT_PCI_INT_CELLS;
- irq_map[i++] = cpu_to_be32(plic_phandle);
- i += FDT_PLIC_ADDR_CELLS;
- irq_map[i] = cpu_to_be32(irq_nr);
+ /* Fill interrupt controller phandle and cells */
+ irq_map[i++] = cpu_to_be32(irqchip_phandle);
+ irq_map[i++] = cpu_to_be32(irq_nr);
+ if (s->aia_type != VIRT_AIA_TYPE_NONE) {
+ irq_map[i++] = cpu_to_be32(0x4);
+ }
- irq_map += FDT_INT_MAP_WIDTH;
+ if (!irq_map_stride) {
+ irq_map_stride = i;
+ }
+ irq_map += irq_map_stride;
}
}
- qemu_fdt_setprop(fdt, nodename, "interrupt-map",
- full_irq_map, sizeof(full_irq_map));
+ qemu_fdt_setprop(fdt, nodename, "interrupt-map", full_irq_map,
+ GPEX_NUM_IRQS * GPEX_NUM_IRQS *
+ irq_map_stride * sizeof(uint32_t));
qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask",
0x1800, 0, 0, 0x7);
@@ -298,7 +335,7 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
{
int cpu;
char *name;
- unsigned long addr;
+ unsigned long addr, size;
uint32_t aclint_cells_size;
uint32_t *aclint_mswi_cells;
uint32_t *aclint_sswi_cells;
@@ -319,29 +356,38 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
}
aclint_cells_size = s->soc[socket].num_harts * sizeof(uint32_t) * 2;
- addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket);
- name = g_strdup_printf("/soc/mswi@%lx", addr);
- qemu_fdt_add_subnode(mc->fdt, name);
- qemu_fdt_setprop_string(mc->fdt, name, "compatible", "riscv,aclint-mswi");
- qemu_fdt_setprop_cells(mc->fdt, name, "reg",
- 0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE);
- qemu_fdt_setprop(mc->fdt, name, "interrupts-extended",
- aclint_mswi_cells, aclint_cells_size);
- qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0);
- qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0);
- riscv_socket_fdt_write_id(mc, mc->fdt, name, socket);
- g_free(name);
+ if (s->aia_type != VIRT_AIA_TYPE_APLIC_IMSIC) {
+ addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket);
+ name = g_strdup_printf("/soc/mswi@%lx", addr);
+ qemu_fdt_add_subnode(mc->fdt, name);
+ qemu_fdt_setprop_string(mc->fdt, name, "compatible",
+ "riscv,aclint-mswi");
+ qemu_fdt_setprop_cells(mc->fdt, name, "reg",
+ 0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE);
+ qemu_fdt_setprop(mc->fdt, name, "interrupts-extended",
+ aclint_mswi_cells, aclint_cells_size);
+ qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0);
+ qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0);
+ riscv_socket_fdt_write_id(mc, mc->fdt, name, socket);
+ g_free(name);
+ }
- addr = memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE +
- (memmap[VIRT_CLINT].size * socket);
+ if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
+ addr = memmap[VIRT_CLINT].base +
+ (RISCV_ACLINT_DEFAULT_MTIMER_SIZE * socket);
+ size = RISCV_ACLINT_DEFAULT_MTIMER_SIZE;
+ } else {
+ addr = memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE +
+ (memmap[VIRT_CLINT].size * socket);
+ size = memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE;
+ }
name = g_strdup_printf("/soc/mtimer@%lx", addr);
qemu_fdt_add_subnode(mc->fdt, name);
qemu_fdt_setprop_string(mc->fdt, name, "compatible",
"riscv,aclint-mtimer");
qemu_fdt_setprop_cells(mc->fdt, name, "reg",
0x0, addr + RISCV_ACLINT_DEFAULT_MTIME,
- 0x0, memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE -
- RISCV_ACLINT_DEFAULT_MTIME,
+ 0x0, size - RISCV_ACLINT_DEFAULT_MTIME,
0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP,
0x0, RISCV_ACLINT_DEFAULT_MTIME);
qemu_fdt_setprop(mc->fdt, name, "interrupts-extended",
@@ -349,19 +395,22 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
riscv_socket_fdt_write_id(mc, mc->fdt, name, socket);
g_free(name);
- addr = memmap[VIRT_ACLINT_SSWI].base +
- (memmap[VIRT_ACLINT_SSWI].size * socket);
- name = g_strdup_printf("/soc/sswi@%lx", addr);
- qemu_fdt_add_subnode(mc->fdt, name);
- qemu_fdt_setprop_string(mc->fdt, name, "compatible", "riscv,aclint-sswi");
- qemu_fdt_setprop_cells(mc->fdt, name, "reg",
- 0x0, addr, 0x0, memmap[VIRT_ACLINT_SSWI].size);
- qemu_fdt_setprop(mc->fdt, name, "interrupts-extended",
- aclint_sswi_cells, aclint_cells_size);
- qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0);
- qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0);
- riscv_socket_fdt_write_id(mc, mc->fdt, name, socket);
- g_free(name);
+ if (s->aia_type != VIRT_AIA_TYPE_APLIC_IMSIC) {
+ addr = memmap[VIRT_ACLINT_SSWI].base +
+ (memmap[VIRT_ACLINT_SSWI].size * socket);
+ name = g_strdup_printf("/soc/sswi@%lx", addr);
+ qemu_fdt_add_subnode(mc->fdt, name);
+ qemu_fdt_setprop_string(mc->fdt, name, "compatible",
+ "riscv,aclint-sswi");
+ qemu_fdt_setprop_cells(mc->fdt, name, "reg",
+ 0x0, addr, 0x0, memmap[VIRT_ACLINT_SSWI].size);
+ qemu_fdt_setprop(mc->fdt, name, "interrupts-extended",
+ aclint_sswi_cells, aclint_cells_size);
+ qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0);
+ qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0);
+ riscv_socket_fdt_write_id(mc, mc->fdt, name, socket);
+ g_free(name);
+ }
g_free(aclint_mswi_cells);
g_free(aclint_mtimer_cells);
@@ -405,8 +454,6 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
plic_name = g_strdup_printf("/soc/plic@%lx", plic_addr);
qemu_fdt_add_subnode(mc->fdt, plic_name);
qemu_fdt_setprop_cell(mc->fdt, plic_name,
- "#address-cells", FDT_PLIC_ADDR_CELLS);
- qemu_fdt_setprop_cell(mc->fdt, plic_name,
"#interrupt-cells", FDT_PLIC_INT_CELLS);
qemu_fdt_setprop_string_array(mc->fdt, plic_name, "compatible",
(char **)&plic_compat,
@@ -425,17 +472,233 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
g_free(plic_cells);
}
+static uint32_t imsic_num_bits(uint32_t count)
+{
+ uint32_t ret = 0;
+
+ while (BIT(ret) < count) {
+ ret++;
+ }
+
+ return ret;
+}
+
+static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
+ uint32_t *phandle, uint32_t *intc_phandles,
+ uint32_t *msi_m_phandle, uint32_t *msi_s_phandle)
+{
+ int cpu, socket;
+ char *imsic_name;
+ MachineState *mc = MACHINE(s);
+ uint32_t imsic_max_hart_per_socket, imsic_guest_bits;
+ uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size;
+
+ *msi_m_phandle = (*phandle)++;
+ *msi_s_phandle = (*phandle)++;
+ imsic_cells = g_new0(uint32_t, mc->smp.cpus * 2);
+ imsic_regs = g_new0(uint32_t, riscv_socket_count(mc) * 4);
+
+ /* M-level IMSIC node */
+ for (cpu = 0; cpu < mc->smp.cpus; cpu++) {
+ imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
+ imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT);
+ }
+ imsic_max_hart_per_socket = 0;
+ for (socket = 0; socket < riscv_socket_count(mc); socket++) {
+ imsic_addr = memmap[VIRT_IMSIC_M].base +
+ socket * VIRT_IMSIC_GROUP_MAX_SIZE;
+ imsic_size = IMSIC_HART_SIZE(0) * s->soc[socket].num_harts;
+ imsic_regs[socket * 4 + 0] = 0;
+ imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr);
+ imsic_regs[socket * 4 + 2] = 0;
+ imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size);
+ if (imsic_max_hart_per_socket < s->soc[socket].num_harts) {
+ imsic_max_hart_per_socket = s->soc[socket].num_harts;
+ }
+ }
+ imsic_name = g_strdup_printf("/soc/imsics@%lx",
+ (unsigned long)memmap[VIRT_IMSIC_M].base);
+ qemu_fdt_add_subnode(mc->fdt, imsic_name);
+ qemu_fdt_setprop_string(mc->fdt, imsic_name, "compatible",
+ "riscv,imsics");
+ qemu_fdt_setprop_cell(mc->fdt, imsic_name, "#interrupt-cells",
+ FDT_IMSIC_INT_CELLS);
+ qemu_fdt_setprop(mc->fdt, imsic_name, "interrupt-controller",
+ NULL, 0);
+ qemu_fdt_setprop(mc->fdt, imsic_name, "msi-controller",
+ NULL, 0);
+ qemu_fdt_setprop(mc->fdt, imsic_name, "interrupts-extended",
+ imsic_cells, mc->smp.cpus * sizeof(uint32_t) * 2);
+ qemu_fdt_setprop(mc->fdt, imsic_name, "reg", imsic_regs,
+ riscv_socket_count(mc) * sizeof(uint32_t) * 4);
+ qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,num-ids",
+ VIRT_IRQCHIP_NUM_MSIS);
+ qemu_fdt_setprop_cells(mc->fdt, imsic_name, "riscv,ipi-id",
+ VIRT_IRQCHIP_IPI_MSI);
+ if (riscv_socket_count(mc) > 1) {
+ qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,hart-index-bits",
+ imsic_num_bits(imsic_max_hart_per_socket));
+ qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,group-index-bits",
+ imsic_num_bits(riscv_socket_count(mc)));
+ qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,group-index-shift",
+ IMSIC_MMIO_GROUP_MIN_SHIFT);
+ }
+ qemu_fdt_setprop_cell(mc->fdt, imsic_name, "phandle", *msi_m_phandle);
+ g_free(imsic_name);
+
+ /* S-level IMSIC node */
+ for (cpu = 0; cpu < mc->smp.cpus; cpu++) {
+ imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
+ imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT);
+ }
+ imsic_guest_bits = imsic_num_bits(s->aia_guests + 1);
+ imsic_max_hart_per_socket = 0;
+ for (socket = 0; socket < riscv_socket_count(mc); socket++) {
+ imsic_addr = memmap[VIRT_IMSIC_S].base +
+ socket * VIRT_IMSIC_GROUP_MAX_SIZE;
+ imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) *
+ s->soc[socket].num_harts;
+ imsic_regs[socket * 4 + 0] = 0;
+ imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr);
+ imsic_regs[socket * 4 + 2] = 0;
+ imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size);
+ if (imsic_max_hart_per_socket < s->soc[socket].num_harts) {
+ imsic_max_hart_per_socket = s->soc[socket].num_harts;
+ }
+ }
+ imsic_name = g_strdup_printf("/soc/imsics@%lx",
+ (unsigned long)memmap[VIRT_IMSIC_S].base);
+ qemu_fdt_add_subnode(mc->fdt, imsic_name);
+ qemu_fdt_setprop_string(mc->fdt, imsic_name, "compatible",
+ "riscv,imsics");
+ qemu_fdt_setprop_cell(mc->fdt, imsic_name, "#interrupt-cells",
+ FDT_IMSIC_INT_CELLS);
+ qemu_fdt_setprop(mc->fdt, imsic_name, "interrupt-controller",
+ NULL, 0);
+ qemu_fdt_setprop(mc->fdt, imsic_name, "msi-controller",
+ NULL, 0);
+ qemu_fdt_setprop(mc->fdt, imsic_name, "interrupts-extended",
+ imsic_cells, mc->smp.cpus * sizeof(uint32_t) * 2);
+ qemu_fdt_setprop(mc->fdt, imsic_name, "reg", imsic_regs,
+ riscv_socket_count(mc) * sizeof(uint32_t) * 4);
+ qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,num-ids",
+ VIRT_IRQCHIP_NUM_MSIS);
+ qemu_fdt_setprop_cells(mc->fdt, imsic_name, "riscv,ipi-id",
+ VIRT_IRQCHIP_IPI_MSI);
+ if (imsic_guest_bits) {
+ qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,guest-index-bits",
+ imsic_guest_bits);
+ }
+ if (riscv_socket_count(mc) > 1) {
+ qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,hart-index-bits",
+ imsic_num_bits(imsic_max_hart_per_socket));
+ qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,group-index-bits",
+ imsic_num_bits(riscv_socket_count(mc)));
+ qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,group-index-shift",
+ IMSIC_MMIO_GROUP_MIN_SHIFT);
+ }
+ qemu_fdt_setprop_cell(mc->fdt, imsic_name, "phandle", *msi_s_phandle);
+ g_free(imsic_name);
+
+ g_free(imsic_regs);
+ g_free(imsic_cells);
+}
+
+static void create_fdt_socket_aplic(RISCVVirtState *s,
+ const MemMapEntry *memmap, int socket,
+ uint32_t msi_m_phandle,
+ uint32_t msi_s_phandle,
+ uint32_t *phandle,
+ uint32_t *intc_phandles,
+ uint32_t *aplic_phandles)
+{
+ int cpu;
+ char *aplic_name;
+ uint32_t *aplic_cells;
+ unsigned long aplic_addr;
+ MachineState *mc = MACHINE(s);
+ uint32_t aplic_m_phandle, aplic_s_phandle;
+
+ aplic_m_phandle = (*phandle)++;
+ aplic_s_phandle = (*phandle)++;
+ aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
+
+ /* M-level APLIC node */
+ for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
+ aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
+ aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT);
+ }
+ aplic_addr = memmap[VIRT_APLIC_M].base +
+ (memmap[VIRT_APLIC_M].size * socket);
+ aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
+ qemu_fdt_add_subnode(mc->fdt, aplic_name);
+ qemu_fdt_setprop_string(mc->fdt, aplic_name, "compatible", "riscv,aplic");
+ qemu_fdt_setprop_cell(mc->fdt, aplic_name,
+ "#interrupt-cells", FDT_APLIC_INT_CELLS);
+ qemu_fdt_setprop(mc->fdt, aplic_name, "interrupt-controller", NULL, 0);
+ if (s->aia_type == VIRT_AIA_TYPE_APLIC) {
+ qemu_fdt_setprop(mc->fdt, aplic_name, "interrupts-extended",
+ aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2);
+ } else {
+ qemu_fdt_setprop_cell(mc->fdt, aplic_name, "msi-parent",
+ msi_m_phandle);
+ }
+ qemu_fdt_setprop_cells(mc->fdt, aplic_name, "reg",
+ 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_M].size);
+ qemu_fdt_setprop_cell(mc->fdt, aplic_name, "riscv,num-sources",
+ VIRT_IRQCHIP_NUM_SOURCES);
+ qemu_fdt_setprop_cell(mc->fdt, aplic_name, "riscv,children",
+ aplic_s_phandle);
+ qemu_fdt_setprop_cells(mc->fdt, aplic_name, "riscv,delegate",
+ aplic_s_phandle, 0x1, VIRT_IRQCHIP_NUM_SOURCES);
+ riscv_socket_fdt_write_id(mc, mc->fdt, aplic_name, socket);
+ qemu_fdt_setprop_cell(mc->fdt, aplic_name, "phandle", aplic_m_phandle);
+ g_free(aplic_name);
+
+ /* S-level APLIC node */
+ for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
+ aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
+ aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT);
+ }
+ aplic_addr = memmap[VIRT_APLIC_S].base +
+ (memmap[VIRT_APLIC_S].size * socket);
+ aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr);
+ qemu_fdt_add_subnode(mc->fdt, aplic_name);
+ qemu_fdt_setprop_string(mc->fdt, aplic_name, "compatible", "riscv,aplic");
+ qemu_fdt_setprop_cell(mc->fdt, aplic_name,
+ "#interrupt-cells", FDT_APLIC_INT_CELLS);
+ qemu_fdt_setprop(mc->fdt, aplic_name, "interrupt-controller", NULL, 0);
+ if (s->aia_type == VIRT_AIA_TYPE_APLIC) {
+ qemu_fdt_setprop(mc->fdt, aplic_name, "interrupts-extended",
+ aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2);
+ } else {
+ qemu_fdt_setprop_cell(mc->fdt, aplic_name, "msi-parent",
+ msi_s_phandle);
+ }
+ qemu_fdt_setprop_cells(mc->fdt, aplic_name, "reg",
+ 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_S].size);
+ qemu_fdt_setprop_cell(mc->fdt, aplic_name, "riscv,num-sources",
+ VIRT_IRQCHIP_NUM_SOURCES);
+ riscv_socket_fdt_write_id(mc, mc->fdt, aplic_name, socket);
+ qemu_fdt_setprop_cell(mc->fdt, aplic_name, "phandle", aplic_s_phandle);
+ g_free(aplic_name);
+
+ g_free(aplic_cells);
+ aplic_phandles[socket] = aplic_s_phandle;
+}
+
static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
bool is_32_bit, uint32_t *phandle,
uint32_t *irq_mmio_phandle,
uint32_t *irq_pcie_phandle,
- uint32_t *irq_virtio_phandle)
+ uint32_t *irq_virtio_phandle,
+ uint32_t *msi_pcie_phandle)
{
- int socket;
char *clust_name;
- uint32_t *intc_phandles;
+ int socket, phandle_pos;
MachineState *mc = MACHINE(s);
- uint32_t xplic_phandles[MAX_NODES];
+ uint32_t msi_m_phandle = 0, msi_s_phandle = 0;
+ uint32_t *intc_phandles, xplic_phandles[MAX_NODES];
qemu_fdt_add_subnode(mc->fdt, "/cpus");
qemu_fdt_setprop_cell(mc->fdt, "/cpus", "timebase-frequency",
@@ -444,32 +707,55 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
qemu_fdt_setprop_cell(mc->fdt, "/cpus", "#address-cells", 0x1);
qemu_fdt_add_subnode(mc->fdt, "/cpus/cpu-map");
+ intc_phandles = g_new0(uint32_t, mc->smp.cpus);
+
+ phandle_pos = mc->smp.cpus;
for (socket = (riscv_socket_count(mc) - 1); socket >= 0; socket--) {
+ phandle_pos -= s->soc[socket].num_harts;
+
clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket);
qemu_fdt_add_subnode(mc->fdt, clust_name);
- intc_phandles = g_new0(uint32_t, s->soc[socket].num_harts);
-
create_fdt_socket_cpus(s, socket, clust_name, phandle,
- is_32_bit, intc_phandles);
+ is_32_bit, &intc_phandles[phandle_pos]);
create_fdt_socket_memory(s, memmap, socket);
+ g_free(clust_name);
+
if (!kvm_enabled()) {
if (s->have_aclint) {
- create_fdt_socket_aclint(s, memmap, socket, intc_phandles);
+ create_fdt_socket_aclint(s, memmap, socket,
+ &intc_phandles[phandle_pos]);
} else {
- create_fdt_socket_clint(s, memmap, socket, intc_phandles);
+ create_fdt_socket_clint(s, memmap, socket,
+ &intc_phandles[phandle_pos]);
}
}
+ }
- create_fdt_socket_plic(s, memmap, socket, phandle,
- intc_phandles, xplic_phandles);
+ if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
+ create_fdt_imsic(s, memmap, phandle, intc_phandles,
+ &msi_m_phandle, &msi_s_phandle);
+ *msi_pcie_phandle = msi_s_phandle;
+ }
- g_free(intc_phandles);
- g_free(clust_name);
+ phandle_pos = mc->smp.cpus;
+ for (socket = (riscv_socket_count(mc) - 1); socket >= 0; socket--) {
+ phandle_pos -= s->soc[socket].num_harts;
+
+ if (s->aia_type == VIRT_AIA_TYPE_NONE) {
+ create_fdt_socket_plic(s, memmap, socket, phandle,
+ &intc_phandles[phandle_pos], xplic_phandles);
+ } else {
+ create_fdt_socket_aplic(s, memmap, socket,
+ msi_m_phandle, msi_s_phandle, phandle,
+ &intc_phandles[phandle_pos], xplic_phandles);
+ }
}
+ g_free(intc_phandles);
+
for (socket = 0; socket < riscv_socket_count(mc); socket++) {
if (socket == 0) {
*irq_mmio_phandle = xplic_phandles[socket];
@@ -505,13 +791,20 @@ static void create_fdt_virtio(RISCVVirtState *s, const MemMapEntry *memmap,
0x0, memmap[VIRT_VIRTIO].size);
qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent",
irq_virtio_phandle);
- qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", VIRTIO_IRQ + i);
+ if (s->aia_type == VIRT_AIA_TYPE_NONE) {
+ qemu_fdt_setprop_cell(mc->fdt, name, "interrupts",
+ VIRTIO_IRQ + i);
+ } else {
+ qemu_fdt_setprop_cells(mc->fdt, name, "interrupts",
+ VIRTIO_IRQ + i, 0x4);
+ }
g_free(name);
}
}
static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap,
- uint32_t irq_pcie_phandle)
+ uint32_t irq_pcie_phandle,
+ uint32_t msi_pcie_phandle)
{
char *name;
MachineState *mc = MACHINE(s);
@@ -531,6 +824,9 @@ static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap,
qemu_fdt_setprop_cells(mc->fdt, name, "bus-range", 0,
memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1);
qemu_fdt_setprop(mc->fdt, name, "dma-coherent", NULL, 0);
+ if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
+ qemu_fdt_setprop_cell(mc->fdt, name, "msi-parent", msi_pcie_phandle);
+ }
qemu_fdt_setprop_cells(mc->fdt, name, "reg", 0,
memmap[VIRT_PCIE_ECAM].base, 0, memmap[VIRT_PCIE_ECAM].size);
qemu_fdt_setprop_sized_cells(mc->fdt, name, "ranges",
@@ -543,7 +839,7 @@ static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap,
2, virt_high_pcie_memmap.base,
2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.size);
- create_pcie_irq_map(mc->fdt, name, irq_pcie_phandle);
+ create_pcie_irq_map(s, mc->fdt, name, irq_pcie_phandle);
g_free(name);
}
@@ -602,7 +898,11 @@ static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap,
0x0, memmap[VIRT_UART0].size);
qemu_fdt_setprop_cell(mc->fdt, name, "clock-frequency", 3686400);
qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent", irq_mmio_phandle);
- qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", UART0_IRQ);
+ if (s->aia_type == VIRT_AIA_TYPE_NONE) {
+ qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", UART0_IRQ);
+ } else {
+ qemu_fdt_setprop_cells(mc->fdt, name, "interrupts", UART0_IRQ, 0x4);
+ }
qemu_fdt_add_subnode(mc->fdt, "/chosen");
qemu_fdt_setprop_string(mc->fdt, "/chosen", "stdout-path", name);
@@ -623,7 +923,11 @@ static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap,
0x0, memmap[VIRT_RTC].base, 0x0, memmap[VIRT_RTC].size);
qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent",
irq_mmio_phandle);
- qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", RTC_IRQ);
+ if (s->aia_type == VIRT_AIA_TYPE_NONE) {
+ qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", RTC_IRQ);
+ } else {
+ qemu_fdt_setprop_cells(mc->fdt, name, "interrupts", RTC_IRQ, 0x4);
+ }
g_free(name);
}
@@ -648,7 +952,7 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
uint64_t mem_size, const char *cmdline, bool is_32_bit)
{
MachineState *mc = MACHINE(s);
- uint32_t phandle = 1, irq_mmio_phandle = 1;
+ uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1;
uint32_t irq_pcie_phandle = 1, irq_virtio_phandle = 1;
if (mc->dtb) {
@@ -678,11 +982,12 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
qemu_fdt_setprop_cell(mc->fdt, "/soc", "#address-cells", 0x2);
create_fdt_sockets(s, memmap, is_32_bit, &phandle,
- &irq_mmio_phandle, &irq_pcie_phandle, &irq_virtio_phandle);
+ &irq_mmio_phandle, &irq_pcie_phandle, &irq_virtio_phandle,
+ &msi_pcie_phandle);
create_fdt_virtio(s, memmap, irq_virtio_phandle);
- create_fdt_pcie(s, memmap, irq_pcie_phandle);
+ create_fdt_pcie(s, memmap, irq_pcie_phandle, msi_pcie_phandle);
create_fdt_reset(s, memmap, &phandle);
@@ -704,7 +1009,7 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
hwaddr high_mmio_base,
hwaddr high_mmio_size,
hwaddr pio_base,
- DeviceState *plic)
+ DeviceState *irqchip)
{
DeviceState *dev;
MemoryRegion *ecam_alias, *ecam_reg;
@@ -738,7 +1043,7 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base);
for (i = 0; i < GPEX_NUM_IRQS; i++) {
- irq = qdev_get_gpio_in(plic, PCIE_IRQ + i);
+ irq = qdev_get_gpio_in(irqchip, PCIE_IRQ + i);
sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq);
gpex_set_irq_num(GPEX_HOST(dev), i, PCIE_IRQ + i);
@@ -769,18 +1074,100 @@ static FWCfgState *create_fw_cfg(const MachineState *mc)
return fw_cfg;
}
+static DeviceState *virt_create_plic(const MemMapEntry *memmap, int socket,
+ int base_hartid, int hart_count)
+{
+ DeviceState *ret;
+ char *plic_hart_config;
+
+ /* Per-socket PLIC hart topology configuration string */
+ plic_hart_config = riscv_plic_hart_config_string(hart_count);
+
+ /* Per-socket PLIC */
+ ret = sifive_plic_create(
+ memmap[VIRT_PLIC].base + socket * memmap[VIRT_PLIC].size,
+ plic_hart_config, hart_count, base_hartid,
+ VIRT_IRQCHIP_NUM_SOURCES,
+ ((1U << VIRT_IRQCHIP_NUM_PRIO_BITS) - 1),
+ VIRT_PLIC_PRIORITY_BASE,
+ VIRT_PLIC_PENDING_BASE,
+ VIRT_PLIC_ENABLE_BASE,
+ VIRT_PLIC_ENABLE_STRIDE,
+ VIRT_PLIC_CONTEXT_BASE,
+ VIRT_PLIC_CONTEXT_STRIDE,
+ memmap[VIRT_PLIC].size);
+
+ g_free(plic_hart_config);
+
+ return ret;
+}
+
+static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests,
+ const MemMapEntry *memmap, int socket,
+ int base_hartid, int hart_count)
+{
+ int i;
+ hwaddr addr;
+ uint32_t guest_bits;
+ DeviceState *aplic_m;
+ bool msimode = (aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) ? true : false;
+
+ if (msimode) {
+ /* Per-socket M-level IMSICs */
+ addr = memmap[VIRT_IMSIC_M].base + socket * VIRT_IMSIC_GROUP_MAX_SIZE;
+ for (i = 0; i < hart_count; i++) {
+ riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0),
+ base_hartid + i, true, 1,
+ VIRT_IRQCHIP_NUM_MSIS);
+ }
+
+ /* Per-socket S-level IMSICs */
+ guest_bits = imsic_num_bits(aia_guests + 1);
+ addr = memmap[VIRT_IMSIC_S].base + socket * VIRT_IMSIC_GROUP_MAX_SIZE;
+ for (i = 0; i < hart_count; i++) {
+ riscv_imsic_create(addr + i * IMSIC_HART_SIZE(guest_bits),
+ base_hartid + i, false, 1 + aia_guests,
+ VIRT_IRQCHIP_NUM_MSIS);
+ }
+ }
+
+ /* Per-socket M-level APLIC */
+ aplic_m = riscv_aplic_create(
+ memmap[VIRT_APLIC_M].base + socket * memmap[VIRT_APLIC_M].size,
+ memmap[VIRT_APLIC_M].size,
+ (msimode) ? 0 : base_hartid,
+ (msimode) ? 0 : hart_count,
+ VIRT_IRQCHIP_NUM_SOURCES,
+ VIRT_IRQCHIP_NUM_PRIO_BITS,
+ msimode, true, NULL);
+
+ if (aplic_m) {
+ /* Per-socket S-level APLIC */
+ riscv_aplic_create(
+ memmap[VIRT_APLIC_S].base + socket * memmap[VIRT_APLIC_S].size,
+ memmap[VIRT_APLIC_S].size,
+ (msimode) ? 0 : base_hartid,
+ (msimode) ? 0 : hart_count,
+ VIRT_IRQCHIP_NUM_SOURCES,
+ VIRT_IRQCHIP_NUM_PRIO_BITS,
+ msimode, false, aplic_m);
+ }
+
+ return aplic_m;
+}
+
static void virt_machine_init(MachineState *machine)
{
const MemMapEntry *memmap = virt_memmap;
RISCVVirtState *s = RISCV_VIRT_MACHINE(machine);
MemoryRegion *system_memory = get_system_memory();
MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
- char *plic_hart_config, *soc_name;
+ char *soc_name;
target_ulong start_addr = memmap[VIRT_DRAM].base;
target_ulong firmware_end_addr, kernel_start_addr;
uint32_t fdt_load_addr;
uint64_t kernel_entry;
- DeviceState *mmio_plic, *virtio_plic, *pcie_plic;
+ DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip;
int i, base_hartid, hart_count;
/* Check socket count limit */
@@ -791,7 +1178,7 @@ static void virt_machine_init(MachineState *machine)
}
/* Initialize sockets */
- mmio_plic = virtio_plic = pcie_plic = NULL;
+ mmio_irqchip = virtio_irqchip = pcie_irqchip = NULL;
for (i = 0; i < riscv_socket_count(machine); i++) {
if (!riscv_socket_check_hartids(machine, i)) {
error_report("discontinuous hartids in socket%d", i);
@@ -823,56 +1210,68 @@ static void virt_machine_init(MachineState *machine)
sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_abort);
if (!kvm_enabled()) {
- /* Per-socket CLINT */
- riscv_aclint_swi_create(
- memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size,
- base_hartid, hart_count, false);
- riscv_aclint_mtimer_create(
- memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size +
- RISCV_ACLINT_SWI_SIZE,
- RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count,
- RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME,
- RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
-
- /* Per-socket ACLINT SSWI */
if (s->have_aclint) {
+ if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
+ /* Per-socket ACLINT MTIMER */
+ riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base +
+ i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
+ RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
+ base_hartid, hart_count,
+ RISCV_ACLINT_DEFAULT_MTIMECMP,
+ RISCV_ACLINT_DEFAULT_MTIME,
+ RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
+ } else {
+ /* Per-socket ACLINT MSWI, MTIMER, and SSWI */
+ riscv_aclint_swi_create(memmap[VIRT_CLINT].base +
+ i * memmap[VIRT_CLINT].size,
+ base_hartid, hart_count, false);
+ riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base +
+ i * memmap[VIRT_CLINT].size +
+ RISCV_ACLINT_SWI_SIZE,
+ RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
+ base_hartid, hart_count,
+ RISCV_ACLINT_DEFAULT_MTIMECMP,
+ RISCV_ACLINT_DEFAULT_MTIME,
+ RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
+ riscv_aclint_swi_create(memmap[VIRT_ACLINT_SSWI].base +
+ i * memmap[VIRT_ACLINT_SSWI].size,
+ base_hartid, hart_count, true);
+ }
+ } else {
+ /* Per-socket SiFive CLINT */
riscv_aclint_swi_create(
- memmap[VIRT_ACLINT_SSWI].base +
- i * memmap[VIRT_ACLINT_SSWI].size,
- base_hartid, hart_count, true);
+ memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size,
+ base_hartid, hart_count, false);
+ riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base +
+ i * memmap[VIRT_CLINT].size + RISCV_ACLINT_SWI_SIZE,
+ RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count,
+ RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME,
+ RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
}
}
- /* Per-socket PLIC hart topology configuration string */
- plic_hart_config = riscv_plic_hart_config_string(hart_count);
-
- /* Per-socket PLIC */
- s->plic[i] = sifive_plic_create(
- memmap[VIRT_PLIC].base + i * memmap[VIRT_PLIC].size,
- plic_hart_config, hart_count, base_hartid,
- VIRT_PLIC_NUM_SOURCES,
- VIRT_PLIC_NUM_PRIORITIES,
- VIRT_PLIC_PRIORITY_BASE,
- VIRT_PLIC_PENDING_BASE,
- VIRT_PLIC_ENABLE_BASE,
- VIRT_PLIC_ENABLE_STRIDE,
- VIRT_PLIC_CONTEXT_BASE,
- VIRT_PLIC_CONTEXT_STRIDE,
- memmap[VIRT_PLIC].size);
- g_free(plic_hart_config);
+ /* Per-socket interrupt controller */
+ if (s->aia_type == VIRT_AIA_TYPE_NONE) {
+ s->irqchip[i] = virt_create_plic(memmap, i,
+ base_hartid, hart_count);
+ } else {
+ s->irqchip[i] = virt_create_aia(s->aia_type, s->aia_guests,
+ memmap, i, base_hartid,
+ hart_count);
+ }
- /* Try to use different PLIC instance based device type */
+ /* Try to use different IRQCHIP instance based device type */
if (i == 0) {
- mmio_plic = s->plic[i];
- virtio_plic = s->plic[i];
- pcie_plic = s->plic[i];
+ mmio_irqchip = s->irqchip[i];
+ virtio_irqchip = s->irqchip[i];
+ pcie_irqchip = s->irqchip[i];
}
if (i == 1) {
- virtio_plic = s->plic[i];
- pcie_plic = s->plic[i];
+ virtio_irqchip = s->irqchip[i];
+ pcie_irqchip = s->irqchip[i];
}
if (i == 2) {
- pcie_plic = s->plic[i];
+ pcie_irqchip = s->irqchip[i];
}
}
@@ -990,7 +1389,7 @@ static void virt_machine_init(MachineState *machine)
for (i = 0; i < VIRTIO_COUNT; i++) {
sysbus_create_simple("virtio-mmio",
memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size,
- qdev_get_gpio_in(DEVICE(virtio_plic), VIRTIO_IRQ + i));
+ qdev_get_gpio_in(DEVICE(virtio_irqchip), VIRTIO_IRQ + i));
}
gpex_pcie_init(system_memory,
@@ -1001,14 +1400,14 @@ static void virt_machine_init(MachineState *machine)
virt_high_pcie_memmap.base,
virt_high_pcie_memmap.size,
memmap[VIRT_PCIE_PIO].base,
- DEVICE(pcie_plic));
+ DEVICE(pcie_irqchip));
serial_mm_init(system_memory, memmap[VIRT_UART0].base,
- 0, qdev_get_gpio_in(DEVICE(mmio_plic), UART0_IRQ), 399193,
+ 0, qdev_get_gpio_in(DEVICE(mmio_irqchip), UART0_IRQ), 399193,
serial_hd(0), DEVICE_LITTLE_ENDIAN);
sysbus_create_simple("goldfish_rtc", memmap[VIRT_RTC].base,
- qdev_get_gpio_in(DEVICE(mmio_plic), RTC_IRQ));
+ qdev_get_gpio_in(DEVICE(mmio_irqchip), RTC_IRQ));
virt_flash_create(s);
@@ -1024,6 +1423,64 @@ static void virt_machine_instance_init(Object *obj)
{
}
+static char *virt_get_aia_guests(Object *obj, Error **errp)
+{
+ RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
+ char val[32];
+
+ sprintf(val, "%d", s->aia_guests);
+ return g_strdup(val);
+}
+
+static void virt_set_aia_guests(Object *obj, const char *val, Error **errp)
+{
+ RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
+
+ s->aia_guests = atoi(val);
+ if (s->aia_guests < 0 || s->aia_guests > VIRT_IRQCHIP_MAX_GUESTS) {
+ error_setg(errp, "Invalid number of AIA IMSIC guests");
+ error_append_hint(errp, "Valid values be between 0 and %d.\n",
+ VIRT_IRQCHIP_MAX_GUESTS);
+ }
+}
+
+static char *virt_get_aia(Object *obj, Error **errp)
+{
+ RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
+ const char *val;
+
+ switch (s->aia_type) {
+ case VIRT_AIA_TYPE_APLIC:
+ val = "aplic";
+ break;
+ case VIRT_AIA_TYPE_APLIC_IMSIC:
+ val = "aplic-imsic";
+ break;
+ default:
+ val = "none";
+ break;
+ };
+
+ return g_strdup(val);
+}
+
+static void virt_set_aia(Object *obj, const char *val, Error **errp)
+{
+ RISCVVirtState *s = RISCV_VIRT_MACHINE(obj);
+
+ if (!strcmp(val, "none")) {
+ s->aia_type = VIRT_AIA_TYPE_NONE;
+ } else if (!strcmp(val, "aplic")) {
+ s->aia_type = VIRT_AIA_TYPE_APLIC;
+ } else if (!strcmp(val, "aplic-imsic")) {
+ s->aia_type = VIRT_AIA_TYPE_APLIC_IMSIC;
+ } else {
+ error_setg(errp, "Invalid AIA interrupt controller type");
+ error_append_hint(errp, "Valid values are none, aplic, and "
+ "aplic-imsic.\n");
+ }
+}
+
static bool virt_get_aclint(Object *obj, Error **errp)
{
MachineState *ms = MACHINE(obj);
@@ -1042,6 +1499,7 @@ static void virt_set_aclint(Object *obj, bool value, Error **errp)
static void virt_machine_class_init(ObjectClass *oc, void *data)
{
+ char str[128];
MachineClass *mc = MACHINE_CLASS(oc);
mc->desc = "RISC-V VirtIO board";
@@ -1062,6 +1520,20 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
object_class_property_set_description(oc, "aclint",
"Set on/off to enable/disable "
"emulating ACLINT devices");
+
+ object_class_property_add_str(oc, "aia", virt_get_aia,
+ virt_set_aia);
+ object_class_property_set_description(oc, "aia",
+ "Set type of AIA interrupt "
+ "conttoller. Valid values are "
+ "none, aplic, and aplic-imsic.");
+
+ object_class_property_add_str(oc, "aia-guests",
+ virt_get_aia_guests,
+ virt_set_aia_guests);
+ sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid value "
+ "should be between 0 and %d.", VIRT_IRQCHIP_MAX_GUESTS);
+ object_class_property_set_description(oc, "aia-guests", str);
}
static const TypeInfo virt_machine_typeinfo = {
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 3666b8d946..072686ed58 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -26,6 +26,7 @@
#include "qemu/main-loop.h"
#include "qemu/module.h"
#include "qemu/hw-version.h"
+#include "qemu/memalign.h"
#include "hw/scsi/scsi.h"
#include "migration/qemu-file-types.h"
#include "migration/vmstate.h"
diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
index 6dbb9f41e4..c89ac53e65 100644
--- a/hw/tpm/tpm_ppi.c
+++ b/hw/tpm/tpm_ppi.c
@@ -12,7 +12,7 @@
*/
#include "qemu/osdep.h"
-
+#include "qemu/memalign.h"
#include "qapi/error.h"
#include "sysemu/memory_mapping.h"
#include "migration/vmstate.h"
diff --git a/hw/usb/dev-mtp.c b/hw/usb/dev-mtp.c
index 1e6ac76bef..e6b77a2a94 100644
--- a/hw/usb/dev-mtp.c
+++ b/hw/usb/dev-mtp.c
@@ -1607,7 +1607,7 @@ static void usb_mtp_write_data(MTPState *s, uint32_t handle)
usb_mtp_object_lookup(s, s->dataset.parent_handle);
char *path = NULL;
uint64_t rc;
- mode_t mask = 0644;
+ mode_t mask = 0755;
int ret = 0;
assert(d != NULL);
@@ -1635,7 +1635,7 @@ static void usb_mtp_write_data(MTPState *s, uint32_t handle)
}
d->fd = open(path, O_CREAT | O_WRONLY |
- O_CLOEXEC | O_NOFOLLOW, mask);
+ O_CLOEXEC | O_NOFOLLOW, mask & 0666);
if (d->fd == -1) {
ret = 1;
goto done;
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index a93d6b2e98..895b29fb86 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -58,8 +58,6 @@ struct ohci_hcca {
#define ED_WBACK_OFFSET offsetof(struct ohci_ed, head)
#define ED_WBACK_SIZE 4
-static void ohci_async_cancel_device(OHCIState *ohci, USBDevice *dev);
-
/* Bitfields for the first word of an Endpoint Desciptor. */
#define OHCI_ED_FA_SHIFT 0
#define OHCI_ED_FA_MASK (0x7f<<OHCI_ED_FA_SHIFT)
@@ -261,92 +259,6 @@ static inline void ohci_set_interrupt(OHCIState *ohci, uint32_t intr)
ohci_intr_update(ohci);
}
-/* Attach or detach a device on a root hub port. */
-static void ohci_attach(USBPort *port1)
-{
- OHCIState *s = port1->opaque;
- OHCIPort *port = &s->rhport[port1->index];
- uint32_t old_state = port->ctrl;
-
- /* set connect status */
- port->ctrl |= OHCI_PORT_CCS | OHCI_PORT_CSC;
-
- /* update speed */
- if (port->port.dev->speed == USB_SPEED_LOW) {
- port->ctrl |= OHCI_PORT_LSDA;
- } else {
- port->ctrl &= ~OHCI_PORT_LSDA;
- }
-
- /* notify of remote-wakeup */
- if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) {
- ohci_set_interrupt(s, OHCI_INTR_RD);
- }
-
- trace_usb_ohci_port_attach(port1->index);
-
- if (old_state != port->ctrl) {
- ohci_set_interrupt(s, OHCI_INTR_RHSC);
- }
-}
-
-static void ohci_detach(USBPort *port1)
-{
- OHCIState *s = port1->opaque;
- OHCIPort *port = &s->rhport[port1->index];
- uint32_t old_state = port->ctrl;
-
- ohci_async_cancel_device(s, port1->dev);
-
- /* set connect status */
- if (port->ctrl & OHCI_PORT_CCS) {
- port->ctrl &= ~OHCI_PORT_CCS;
- port->ctrl |= OHCI_PORT_CSC;
- }
- /* disable port */
- if (port->ctrl & OHCI_PORT_PES) {
- port->ctrl &= ~OHCI_PORT_PES;
- port->ctrl |= OHCI_PORT_PESC;
- }
- trace_usb_ohci_port_detach(port1->index);
-
- if (old_state != port->ctrl) {
- ohci_set_interrupt(s, OHCI_INTR_RHSC);
- }
-}
-
-static void ohci_wakeup(USBPort *port1)
-{
- OHCIState *s = port1->opaque;
- OHCIPort *port = &s->rhport[port1->index];
- uint32_t intr = 0;
- if (port->ctrl & OHCI_PORT_PSS) {
- trace_usb_ohci_port_wakeup(port1->index);
- port->ctrl |= OHCI_PORT_PSSC;
- port->ctrl &= ~OHCI_PORT_PSS;
- intr = OHCI_INTR_RHSC;
- }
- /* Note that the controller can be suspended even if this port is not */
- if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) {
- trace_usb_ohci_remote_wakeup(s->name);
- /* This is the one state transition the controller can do by itself */
- s->ctl &= ~OHCI_CTL_HCFS;
- s->ctl |= OHCI_USB_RESUME;
- /* In suspend mode only ResumeDetected is possible, not RHSC:
- * see the OHCI spec 5.1.2.3.
- */
- intr = OHCI_INTR_RD;
- }
- ohci_set_interrupt(s, intr);
-}
-
-static void ohci_child_detach(USBPort *port1, USBDevice *child)
-{
- OHCIState *s = port1->opaque;
-
- ohci_async_cancel_device(s, child);
-}
-
static USBDevice *ohci_find_device(OHCIState *ohci, uint8_t addr)
{
USBDevice *dev;
@@ -369,6 +281,10 @@ void ohci_stop_endpoints(OHCIState *ohci)
USBDevice *dev;
int i, j;
+ if (ohci->async_td) {
+ usb_cancel_packet(&ohci->usb_packet);
+ ohci->async_td = 0;
+ }
for (i = 0; i < ohci->num_ports; i++) {
dev = ohci->rhport[i].port.dev;
if (dev && dev->attached) {
@@ -398,10 +314,6 @@ static void ohci_roothub_reset(OHCIState *ohci)
usb_port_reset(&port->port);
}
}
- if (ohci->async_td) {
- usb_cancel_packet(&ohci->usb_packet);
- ohci->async_td = 0;
- }
ohci_stop_endpoints(ohci);
}
@@ -634,21 +546,9 @@ static int ohci_copy_iso_td(OHCIState *ohci,
return 0;
}
-static void ohci_process_lists(OHCIState *ohci, int completion);
-
-static void ohci_async_complete_packet(USBPort *port, USBPacket *packet)
-{
- OHCIState *ohci = container_of(packet, OHCIState, usb_packet);
-
- trace_usb_ohci_async_complete();
- ohci->async_complete = true;
- ohci_process_lists(ohci, 1);
-}
-
#define USUB(a, b) ((int16_t)((uint16_t)(a) - (uint16_t)(b)))
-static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed,
- int completion)
+static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed)
{
int dir;
size_t len = 0;
@@ -658,6 +558,9 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed,
int i;
USBDevice *dev;
USBEndpoint *ep;
+ USBPacket *pkt;
+ uint8_t buf[8192];
+ bool int_req;
struct ohci_iso_td iso_td;
uint32_t addr;
uint16_t starting_frame;
@@ -792,40 +695,42 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed,
} else {
len = end_addr - start_addr + 1;
}
- if (len > sizeof(ohci->usb_buf)) {
- len = sizeof(ohci->usb_buf);
+ if (len > sizeof(buf)) {
+ len = sizeof(buf);
}
if (len && dir != OHCI_TD_DIR_IN) {
- if (ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, len,
+ if (ohci_copy_iso_td(ohci, start_addr, end_addr, buf, len,
DMA_DIRECTION_TO_DEVICE)) {
ohci_die(ohci);
return 1;
}
}
- if (!completion) {
- bool int_req = relative_frame_number == frame_count &&
- OHCI_BM(iso_td.flags, TD_DI) == 0;
- dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA));
- if (dev == NULL) {
- trace_usb_ohci_td_dev_error();
- return 1;
- }
- ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN));
- usb_packet_setup(&ohci->usb_packet, pid, ep, 0, addr, false, int_req);
- usb_packet_addbuf(&ohci->usb_packet, ohci->usb_buf, len);
- usb_handle_packet(dev, &ohci->usb_packet);
- if (ohci->usb_packet.status == USB_RET_ASYNC) {
- usb_device_flush_ep_queue(dev, ep);
- return 1;
- }
+ dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA));
+ if (dev == NULL) {
+ trace_usb_ohci_td_dev_error();
+ return 1;
}
- if (ohci->usb_packet.status == USB_RET_SUCCESS) {
- ret = ohci->usb_packet.actual_length;
+ ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN));
+ pkt = g_new0(USBPacket, 1);
+ usb_packet_init(pkt);
+ int_req = relative_frame_number == frame_count &&
+ OHCI_BM(iso_td.flags, TD_DI) == 0;
+ usb_packet_setup(pkt, pid, ep, 0, addr, false, int_req);
+ usb_packet_addbuf(pkt, buf, len);
+ usb_handle_packet(dev, pkt);
+ if (pkt->status == USB_RET_ASYNC) {
+ usb_device_flush_ep_queue(dev, ep);
+ g_free(pkt);
+ return 1;
+ }
+ if (pkt->status == USB_RET_SUCCESS) {
+ ret = pkt->actual_length;
} else {
- ret = ohci->usb_packet.status;
+ ret = pkt->status;
}
+ g_free(pkt);
trace_usb_ohci_iso_td_so(start_offset, end_offset, start_addr, end_addr,
str, len, ret);
@@ -833,7 +738,7 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed,
/* Writeback */
if (dir == OHCI_TD_DIR_IN && ret >= 0 && ret <= len) {
/* IN transfer succeeded */
- if (ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, ret,
+ if (ohci_copy_iso_td(ohci, start_addr, end_addr, buf, ret,
DMA_DIRECTION_FROM_DEVICE)) {
ohci_die(ohci);
return 1;
@@ -1033,21 +938,21 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed)
ohci->async_td = 0;
ohci->async_complete = false;
} else {
+ dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA));
+ if (dev == NULL) {
+ trace_usb_ohci_td_dev_error();
+ return 1;
+ }
+ ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN));
if (ohci->async_td) {
/* ??? The hardware should allow one active packet per
endpoint. We only allow one active packet per controller.
This should be sufficient as long as devices respond in a
timely manner.
*/
- trace_usb_ohci_td_too_many_pending();
+ trace_usb_ohci_td_too_many_pending(ep->nr);
return 1;
}
- dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA));
- if (dev == NULL) {
- trace_usb_ohci_td_dev_error();
- return 1;
- }
- ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN));
usb_packet_setup(&ohci->usb_packet, pid, ep, 0, addr, !flag_r,
OHCI_BM(td.flags, TD_DI) == 0);
usb_packet_addbuf(&ohci->usb_packet, ohci->usb_buf, pktlen);
@@ -1156,7 +1061,7 @@ exit_no_retire:
}
/* Service an endpoint list. Returns nonzero if active TD were found. */
-static int ohci_service_ed_list(OHCIState *ohci, uint32_t head, int completion)
+static int ohci_service_ed_list(OHCIState *ohci, uint32_t head)
{
struct ohci_ed ed;
uint32_t next_ed;
@@ -1207,8 +1112,9 @@ static int ohci_service_ed_list(OHCIState *ohci, uint32_t head, int completion)
break;
} else {
/* Handle isochronous endpoints */
- if (ohci_service_iso_td(ohci, &ed, completion))
+ if (ohci_service_iso_td(ohci, &ed)) {
break;
+ }
}
}
@@ -1235,20 +1141,20 @@ static void ohci_sof(OHCIState *ohci)
}
/* Process Control and Bulk lists. */
-static void ohci_process_lists(OHCIState *ohci, int completion)
+static void ohci_process_lists(OHCIState *ohci)
{
if ((ohci->ctl & OHCI_CTL_CLE) && (ohci->status & OHCI_STATUS_CLF)) {
if (ohci->ctrl_cur && ohci->ctrl_cur != ohci->ctrl_head) {
trace_usb_ohci_process_lists(ohci->ctrl_head, ohci->ctrl_cur);
}
- if (!ohci_service_ed_list(ohci, ohci->ctrl_head, completion)) {
+ if (!ohci_service_ed_list(ohci, ohci->ctrl_head)) {
ohci->ctrl_cur = 0;
ohci->status &= ~OHCI_STATUS_CLF;
}
}
if ((ohci->ctl & OHCI_CTL_BLE) && (ohci->status & OHCI_STATUS_BLF)) {
- if (!ohci_service_ed_list(ohci, ohci->bulk_head, completion)) {
+ if (!ohci_service_ed_list(ohci, ohci->bulk_head)) {
ohci->bulk_cur = 0;
ohci->status &= ~OHCI_STATUS_BLF;
}
@@ -1272,19 +1178,15 @@ static void ohci_frame_boundary(void *opaque)
int n;
n = ohci->frame_number & 0x1f;
- ohci_service_ed_list(ohci, le32_to_cpu(hcca.intr[n]), 0);
+ ohci_service_ed_list(ohci, le32_to_cpu(hcca.intr[n]));
}
/* Cancel all pending packets if either of the lists has been disabled. */
if (ohci->old_ctl & (~ohci->ctl) & (OHCI_CTL_BLE | OHCI_CTL_CLE)) {
- if (ohci->async_td) {
- usb_cancel_packet(&ohci->usb_packet);
- ohci->async_td = 0;
- }
ohci_stop_endpoints(ohci);
}
ohci->old_ctl = ohci->ctl;
- ohci_process_lists(ohci, 0);
+ ohci_process_lists(ohci);
/* Stop if UnrecoverableError happened or ohci_sof will crash */
if (ohci->intr_status & OHCI_INTR_UE) {
@@ -1793,8 +1695,45 @@ static void ohci_mem_write(void *opaque,
}
}
-static void ohci_async_cancel_device(OHCIState *ohci, USBDevice *dev)
+static const MemoryRegionOps ohci_mem_ops = {
+ .read = ohci_mem_read,
+ .write = ohci_mem_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+/* USBPortOps */
+static void ohci_attach(USBPort *port1)
{
+ OHCIState *s = port1->opaque;
+ OHCIPort *port = &s->rhport[port1->index];
+ uint32_t old_state = port->ctrl;
+
+ /* set connect status */
+ port->ctrl |= OHCI_PORT_CCS | OHCI_PORT_CSC;
+
+ /* update speed */
+ if (port->port.dev->speed == USB_SPEED_LOW) {
+ port->ctrl |= OHCI_PORT_LSDA;
+ } else {
+ port->ctrl &= ~OHCI_PORT_LSDA;
+ }
+
+ /* notify of remote-wakeup */
+ if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) {
+ ohci_set_interrupt(s, OHCI_INTR_RD);
+ }
+
+ trace_usb_ohci_port_attach(port1->index);
+
+ if (old_state != port->ctrl) {
+ ohci_set_interrupt(s, OHCI_INTR_RHSC);
+ }
+}
+
+static void ohci_child_detach(USBPort *port1, USBDevice *dev)
+{
+ OHCIState *ohci = port1->opaque;
+
if (ohci->async_td &&
usb_packet_is_inflight(&ohci->usb_packet) &&
ohci->usb_packet.ep->dev == dev) {
@@ -1803,11 +1742,65 @@ static void ohci_async_cancel_device(OHCIState *ohci, USBDevice *dev)
}
}
-static const MemoryRegionOps ohci_mem_ops = {
- .read = ohci_mem_read,
- .write = ohci_mem_write,
- .endianness = DEVICE_LITTLE_ENDIAN,
-};
+static void ohci_detach(USBPort *port1)
+{
+ OHCIState *s = port1->opaque;
+ OHCIPort *port = &s->rhport[port1->index];
+ uint32_t old_state = port->ctrl;
+
+ ohci_child_detach(port1, port1->dev);
+
+ /* set connect status */
+ if (port->ctrl & OHCI_PORT_CCS) {
+ port->ctrl &= ~OHCI_PORT_CCS;
+ port->ctrl |= OHCI_PORT_CSC;
+ }
+ /* disable port */
+ if (port->ctrl & OHCI_PORT_PES) {
+ port->ctrl &= ~OHCI_PORT_PES;
+ port->ctrl |= OHCI_PORT_PESC;
+ }
+ trace_usb_ohci_port_detach(port1->index);
+
+ if (old_state != port->ctrl) {
+ ohci_set_interrupt(s, OHCI_INTR_RHSC);
+ }
+}
+
+static void ohci_wakeup(USBPort *port1)
+{
+ OHCIState *s = port1->opaque;
+ OHCIPort *port = &s->rhport[port1->index];
+ uint32_t intr = 0;
+ if (port->ctrl & OHCI_PORT_PSS) {
+ trace_usb_ohci_port_wakeup(port1->index);
+ port->ctrl |= OHCI_PORT_PSSC;
+ port->ctrl &= ~OHCI_PORT_PSS;
+ intr = OHCI_INTR_RHSC;
+ }
+ /* Note that the controller can be suspended even if this port is not */
+ if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) {
+ trace_usb_ohci_remote_wakeup(s->name);
+ /* This is the one state transition the controller can do by itself */
+ s->ctl &= ~OHCI_CTL_HCFS;
+ s->ctl |= OHCI_USB_RESUME;
+ /*
+ * In suspend mode only ResumeDetected is possible, not RHSC:
+ * see the OHCI spec 5.1.2.3.
+ */
+ intr = OHCI_INTR_RD;
+ }
+ ohci_set_interrupt(s, intr);
+}
+
+static void ohci_async_complete_packet(USBPort *port, USBPacket *packet)
+{
+ OHCIState *ohci = container_of(packet, OHCIState, usb_packet);
+
+ trace_usb_ohci_async_complete();
+ ohci->async_complete = true;
+ ohci_process_lists(ohci);
+}
static USBPortOps ohci_port_ops = {
.attach = ohci_attach,
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 14bdb89676..0cd0a5e540 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -2523,7 +2523,7 @@ static void xhci_process_commands(XHCIState *xhci)
case CR_VENDOR_NEC_FIRMWARE_REVISION:
if (xhci->nec_quirks) {
event.type = 48; /* NEC reply */
- event.length = 0x3025;
+ event.length = 0x3034;
} else {
event.ccode = CC_TRB_ERROR;
}
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 5f0ef9cb3b..8692ea2561 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -1239,7 +1239,11 @@ static void usbredir_create_parser(USBRedirDevice *dev)
DPRINTF("creating usbredirparser\n");
- dev->parser = qemu_oom_check(usbredirparser_create());
+ dev->parser = usbredirparser_create();
+ if (!dev->parser) {
+ error_report("usbredirparser_create() failed");
+ exit(1);
+ }
dev->parser->priv = dev;
dev->parser->log_func = usbredir_log;
dev->parser->read_func = usbredir_read;
@@ -2239,7 +2243,10 @@ static int usbredir_put_parser(QEMUFile *f, void *priv, size_t unused,
}
usbredirparser_serialize(dev->parser, &data, &len);
- qemu_oom_check(data);
+ if (!data) {
+ error_report("usbredirparser_serialize failed");
+ exit(1);
+ }
qemu_put_be32(f, len);
qemu_put_buffer(f, data, len);
@@ -2330,7 +2337,11 @@ static int usbredir_get_bufpq(QEMUFile *f, void *priv, size_t unused,
bufp->len = qemu_get_be32(f);
bufp->status = qemu_get_be32(f);
bufp->offset = 0;
- bufp->data = qemu_oom_check(malloc(bufp->len)); /* regular malloc! */
+ bufp->data = malloc(bufp->len); /* regular malloc! */
+ if (!bufp->data) {
+ error_report("usbredir_get_bufpq: out of memory");
+ exit(1);
+ }
bufp->free_on_destroy = bufp->data;
qemu_get_buffer(f, bufp->data, bufp->len);
QTAILQ_INSERT_TAIL(&endp->bufpq, bufp, next);
diff --git a/hw/usb/trace-events b/hw/usb/trace-events
index b8287b63f1..9773cb5330 100644
--- a/hw/usb/trace-events
+++ b/hw/usb/trace-events
@@ -51,7 +51,7 @@ usb_ohci_td_skip_async(void) ""
usb_ohci_td_pkt_hdr(uint32_t addr, int64_t pktlen, int64_t len, const char *s, int flag_r, uint32_t cbp, uint32_t be) " TD @ 0x%.8x %" PRId64 " of %" PRId64 " bytes %s r=%d cbp=0x%.8x be=0x%.8x"
usb_ohci_td_pkt_short(const char *dir, const char *buf) "%s data: %s"
usb_ohci_td_pkt_full(const char *dir, const char *buf) "%s data: %s"
-usb_ohci_td_too_many_pending(void) ""
+usb_ohci_td_too_many_pending(int ep) "ep=%d"
usb_ohci_td_packet_status(int status) "status=%d"
usb_ohci_ed_read_error(uint32_t addr) "ED read error at 0x%x"
usb_ohci_ed_pkt(uint32_t cur, int h, int c, uint32_t head, uint32_t tail, uint32_t next) "ED @ 0x%.8x h=%u c=%u\n head=0x%.8x tailp=0x%.8x next=0x%.8x"
diff --git a/include/block/block-common.h b/include/block/block-common.h
new file mode 100644
index 0000000000..fdb7306e78
--- /dev/null
+++ b/include/block/block-common.h
@@ -0,0 +1,419 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_COMMON_H
+#define BLOCK_COMMON_H
+
+#include "block/aio.h"
+#include "block/aio-wait.h"
+#include "qemu/iov.h"
+#include "qemu/coroutine.h"
+#include "block/accounting.h"
+#include "block/dirty-bitmap.h"
+#include "block/blockjob.h"
+#include "qemu/hbitmap.h"
+#include "qemu/transactions.h"
+
+/*
+ * generated_co_wrapper
+ *
+ * Function specifier, which does nothing but mark functions to be
+ * generated by scripts/block-coroutine-wrapper.py
+ *
+ * Read more in docs/devel/block-coroutine-wrapper.rst
+ */
+#define generated_co_wrapper
+
+/* block.c */
+typedef struct BlockDriver BlockDriver;
+typedef struct BdrvChild BdrvChild;
+typedef struct BdrvChildClass BdrvChildClass;
+
+typedef struct BlockDriverInfo {
+ /* in bytes, 0 if irrelevant */
+ int cluster_size;
+ /* offset at which the VM state can be saved (0 if not possible) */
+ int64_t vm_state_offset;
+ bool is_dirty;
+ /*
+ * True if this block driver only supports compressed writes
+ */
+ bool needs_compressed_writes;
+} BlockDriverInfo;
+
+typedef struct BlockFragInfo {
+ uint64_t allocated_clusters;
+ uint64_t total_clusters;
+ uint64_t fragmented_clusters;
+ uint64_t compressed_clusters;
+} BlockFragInfo;
+
+typedef enum {
+ BDRV_REQ_COPY_ON_READ = 0x1,
+ BDRV_REQ_ZERO_WRITE = 0x2,
+
+ /*
+ * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
+ * that the block driver should unmap (discard) blocks if it is guaranteed
+ * that the result will read back as zeroes. The flag is only passed to the
+ * driver if the block device is opened with BDRV_O_UNMAP.
+ */
+ BDRV_REQ_MAY_UNMAP = 0x4,
+
+ BDRV_REQ_FUA = 0x10,
+ BDRV_REQ_WRITE_COMPRESSED = 0x20,
+
+ /*
+ * Signifies that this write request will not change the visible disk
+ * content.
+ */
+ BDRV_REQ_WRITE_UNCHANGED = 0x40,
+
+ /*
+ * Forces request serialisation. Use only with write requests.
+ */
+ BDRV_REQ_SERIALISING = 0x80,
+
+ /*
+ * Execute the request only if the operation can be offloaded or otherwise
+ * be executed efficiently, but return an error instead of using a slow
+ * fallback.
+ */
+ BDRV_REQ_NO_FALLBACK = 0x100,
+
+ /*
+ * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
+ * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
+ * filter is involved), in which case it signals that the COR operation
+ * need not read the data into memory (qiov) but only ensure they are
+ * copied to the top layer (i.e., that COR operation is done).
+ */
+ BDRV_REQ_PREFETCH = 0x200,
+
+ /*
+ * If we need to wait for other requests, just fail immediately. Used
+ * only together with BDRV_REQ_SERIALISING. Used only with requests aligned
+ * to request_alignment (corresponding assertions are in block/io.c).
+ */
+ BDRV_REQ_NO_WAIT = 0x400,
+
+ /* Mask of valid flags */
+ BDRV_REQ_MASK = 0x7ff,
+} BdrvRequestFlags;
+
+#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */
+#define BDRV_O_RDWR 0x0002
+#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
+#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save
+ writes in a snapshot */
+#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
+#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
+#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the
+ thread pool */
+#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
+#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
+#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
+#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */
+#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
+#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
+#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
+#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given:
+ select an appropriate protocol driver,
+ ignoring the format layer */
+#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
+#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening
+ read-write fails */
+#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
+
+#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
+
+
+/* Option names of options parsed by the block layer */
+
+#define BDRV_OPT_CACHE_WB "cache.writeback"
+#define BDRV_OPT_CACHE_DIRECT "cache.direct"
+#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
+#define BDRV_OPT_READ_ONLY "read-only"
+#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
+#define BDRV_OPT_DISCARD "discard"
+#define BDRV_OPT_FORCE_SHARE "force-share"
+
+
+#define BDRV_SECTOR_BITS 9
+#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
+
+#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \
+ INT_MAX >> BDRV_SECTOR_BITS)
+#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
+
+/*
+ * We want allow aligning requests and disk length up to any 32bit alignment
+ * and don't afraid of overflow.
+ * To achieve it, and in the same time use some pretty number as maximum disk
+ * size, let's define maximum "length" (a limit for any offset/bytes request and
+ * for disk size) to be the greatest power of 2 less than INT64_MAX.
+ */
+#define BDRV_MAX_ALIGNMENT (1L << 30)
+#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT))
+
+/*
+ * Allocation status flags for bdrv_block_status() and friends.
+ *
+ * Public flags:
+ * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
+ * BDRV_BLOCK_ZERO: offset reads as zero
+ * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
+ * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
+ * layer rather than any backing, set by block layer
+ * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
+ * layer, set by block layer
+ *
+ * Internal flags:
+ * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
+ * that the block layer recompute the answer from the returned
+ * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
+ * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
+ * zeroes in file child of current block node inside
+ * returned region. Only valid together with both
+ * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
+ * appear with BDRV_BLOCK_ZERO.
+ *
+ * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
+ * host offset within the returned BDS that is allocated for the
+ * corresponding raw guest data. However, whether that offset
+ * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
+ *
+ * DATA ZERO OFFSET_VALID
+ * t t t sectors read as zero, returned file is zero at offset
+ * t f t sectors read as valid from file at offset
+ * f t t sectors preallocated, read as zero, returned file not
+ * necessarily zero at offset
+ * f f t sectors preallocated but read from backing_hd,
+ * returned file contains garbage at offset
+ * t t f sectors preallocated, read as zero, unknown offset
+ * t f f sectors read from unknown file or offset
+ * f t f not allocated or unknown offset, read as zero
+ * f f f not allocated or unknown offset, read from backing_hd
+ */
+#define BDRV_BLOCK_DATA 0x01
+#define BDRV_BLOCK_ZERO 0x02
+#define BDRV_BLOCK_OFFSET_VALID 0x04
+#define BDRV_BLOCK_RAW 0x08
+#define BDRV_BLOCK_ALLOCATED 0x10
+#define BDRV_BLOCK_EOF 0x20
+#define BDRV_BLOCK_RECURSE 0x40
+
+typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
+
+typedef struct BDRVReopenState {
+ BlockDriverState *bs;
+ int flags;
+ BlockdevDetectZeroesOptions detect_zeroes;
+ bool backing_missing;
+ BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
+ BlockDriverState *old_file_bs; /* keep pointer for permissions update */
+ QDict *options;
+ QDict *explicit_options;
+ void *opaque;
+} BDRVReopenState;
+
+/*
+ * Block operation types
+ */
+typedef enum BlockOpType {
+ BLOCK_OP_TYPE_BACKUP_SOURCE,
+ BLOCK_OP_TYPE_BACKUP_TARGET,
+ BLOCK_OP_TYPE_CHANGE,
+ BLOCK_OP_TYPE_COMMIT_SOURCE,
+ BLOCK_OP_TYPE_COMMIT_TARGET,
+ BLOCK_OP_TYPE_DATAPLANE,
+ BLOCK_OP_TYPE_DRIVE_DEL,
+ BLOCK_OP_TYPE_EJECT,
+ BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
+ BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
+ BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
+ BLOCK_OP_TYPE_MIRROR_SOURCE,
+ BLOCK_OP_TYPE_MIRROR_TARGET,
+ BLOCK_OP_TYPE_RESIZE,
+ BLOCK_OP_TYPE_STREAM,
+ BLOCK_OP_TYPE_REPLACE,
+ BLOCK_OP_TYPE_MAX,
+} BlockOpType;
+
+/* Block node permission constants */
+enum {
+ /**
+ * A user that has the "permission" of consistent reads is guaranteed that
+ * their view of the contents of the block device is complete and
+ * self-consistent, representing the contents of a disk at a specific
+ * point.
+ *
+ * For most block devices (including their backing files) this is true, but
+ * the property cannot be maintained in a few situations like for
+ * intermediate nodes of a commit block job.
+ */
+ BLK_PERM_CONSISTENT_READ = 0x01,
+
+ /** This permission is required to change the visible disk contents. */
+ BLK_PERM_WRITE = 0x02,
+
+ /**
+ * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
+ * required for writes to the block node when the caller promises that
+ * the visible disk content doesn't change.
+ *
+ * As the BLK_PERM_WRITE permission is strictly stronger, either is
+ * sufficient to perform an unchanging write.
+ */
+ BLK_PERM_WRITE_UNCHANGED = 0x04,
+
+ /** This permission is required to change the size of a block node. */
+ BLK_PERM_RESIZE = 0x08,
+
+ /**
+ * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU
+ * 6.1 and earlier may still lock the corresponding byte in block/file-posix
+ * locking. So, implementing some new permission should be very careful to
+ * not interfere with this old unused thing.
+ */
+
+ BLK_PERM_ALL = 0x0f,
+
+ DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ
+ | BLK_PERM_WRITE
+ | BLK_PERM_WRITE_UNCHANGED
+ | BLK_PERM_RESIZE,
+
+ DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
+};
+
+/*
+ * Flags that parent nodes assign to child nodes to specify what kind of
+ * role(s) they take.
+ *
+ * At least one of DATA, METADATA, FILTERED, or COW must be set for
+ * every child.
+ */
+enum BdrvChildRoleBits {
+ /*
+ * This child stores data.
+ * Any node may have an arbitrary number of such children.
+ */
+ BDRV_CHILD_DATA = (1 << 0),
+
+ /*
+ * This child stores metadata.
+ * Any node may have an arbitrary number of metadata-storing
+ * children.
+ */
+ BDRV_CHILD_METADATA = (1 << 1),
+
+ /*
+ * A child that always presents exactly the same visible data as
+ * the parent, e.g. by virtue of the parent forwarding all reads
+ * and writes.
+ * This flag is mutually exclusive with DATA, METADATA, and COW.
+ * Any node may have at most one filtered child at a time.
+ */
+ BDRV_CHILD_FILTERED = (1 << 2),
+
+ /*
+ * Child from which to read all data that isn't allocated in the
+ * parent (i.e., the backing child); such data is copied to the
+ * parent through COW (and optionally COR).
+ * This field is mutually exclusive with DATA, METADATA, and
+ * FILTERED.
+ * Any node may have at most one such backing child at a time.
+ */
+ BDRV_CHILD_COW = (1 << 3),
+
+ /*
+ * The primary child. For most drivers, this is the child whose
+ * filename applies best to the parent node.
+ * Any node may have at most one primary child at a time.
+ */
+ BDRV_CHILD_PRIMARY = (1 << 4),
+
+ /* Useful combination of flags */
+ BDRV_CHILD_IMAGE = BDRV_CHILD_DATA
+ | BDRV_CHILD_METADATA
+ | BDRV_CHILD_PRIMARY,
+};
+
+/* Mask of BdrvChildRoleBits values */
+typedef unsigned int BdrvChildRole;
+
+typedef struct BdrvCheckResult {
+ int corruptions;
+ int leaks;
+ int check_errors;
+ int corruptions_fixed;
+ int leaks_fixed;
+ int64_t image_end_offset;
+ BlockFragInfo bfi;
+} BdrvCheckResult;
+
+typedef enum {
+ BDRV_FIX_LEAKS = 1,
+ BDRV_FIX_ERRORS = 2,
+} BdrvCheckMode;
+
+typedef struct BlockSizes {
+ uint32_t phys;
+ uint32_t log;
+} BlockSizes;
+
+typedef struct HDGeometry {
+ uint32_t heads;
+ uint32_t sectors;
+ uint32_t cylinders;
+} HDGeometry;
+
+/*
+ * Common functions that are neither I/O nor Global State.
+ *
+ * These functions must never call any function from other categories
+ * (I/O, "I/O or GS", Global State) except this one, but can be invoked by
+ * all of them.
+ */
+
+char *bdrv_perm_names(uint64_t perm);
+uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm);
+
+void bdrv_init_with_whitelist(void);
+bool bdrv_uses_whitelist(void);
+int bdrv_is_whitelisted(BlockDriver *drv, bool read_only);
+
+int bdrv_parse_aio(const char *mode, int *flags);
+int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
+int bdrv_parse_discard_flags(const char *mode, int *flags);
+
+int path_has_protocol(const char *path);
+int path_is_absolute(const char *path);
+char *path_combine(const char *base_path, const char *filename);
+
+char *bdrv_get_full_backing_filename_from_filename(const char *backed,
+ const char *backing,
+ Error **errp);
+
+#endif /* BLOCK_COMMON_H */
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index 99370fa38b..68bbd344b2 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -25,6 +25,7 @@ typedef struct BlockCopyState BlockCopyState;
typedef struct BlockCopyCallState BlockCopyCallState;
BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ const BdrvDirtyBitmap *bitmap,
Error **errp);
/* Function should be called prior any actual copy request */
@@ -34,6 +35,7 @@ void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm);
void block_copy_state_free(BlockCopyState *s);
+void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes);
int64_t block_copy_reset_unallocated(BlockCopyState *s,
int64_t offset, int64_t *count);
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
new file mode 100644
index 0000000000..25bb69bbef
--- /dev/null
+++ b/include/block/block-global-state.h
@@ -0,0 +1,253 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_GLOBAL_STATE_H
+#define BLOCK_GLOBAL_STATE_H
+
+#include "block-common.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * If a function modifies the graph, it also uses drain and/or
+ * aio_context_acquire/release to be sure it has unique access.
+ * aio_context locking is needed together with BQL because of
+ * the thread-safe I/O API that concurrently runs and accesses
+ * the graph without the BQL.
+ *
+ * It is important to note that not all of these functions are
+ * necessarily limited to running under the BQL, but they would
+ * require additional auditing and many small thread-safety changes
+ * to move them into the I/O API. Often it's not worth doing that
+ * work since the APIs are only used with the BQL held at the
+ * moment, so they have been placed in the GS API (for now).
+ *
+ * These functions can call any function from this and other categories
+ * (I/O, "I/O or GS", Common), but must be invoked only by other GS APIs.
+ *
+ * All functions in this header must use the macro
+ * GLOBAL_STATE_CODE();
+ * to catch when they are accidentally called without the BQL.
+ */
+
+void bdrv_init(void);
+BlockDriver *bdrv_find_protocol(const char *filename,
+ bool allow_protocol_prefix,
+ Error **errp);
+BlockDriver *bdrv_find_format(const char *format_name);
+int bdrv_create(BlockDriver *drv, const char* filename,
+ QemuOpts *opts, Error **errp);
+int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
+
+BlockDriverState *bdrv_new(void);
+int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
+ Error **errp);
+int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
+ Error **errp);
+int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
+ Error **errp);
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+ int flags, Error **errp);
+int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
+
+BdrvChild *bdrv_open_child(const char *filename,
+ QDict *options, const char *bdref_key,
+ BlockDriverState *parent,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ bool allow_none, Error **errp);
+BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+ Error **errp);
+int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
+ const char *bdref_key, Error **errp);
+BlockDriverState *bdrv_open(const char *filename, const char *reference,
+ QDict *options, int flags, Error **errp);
+BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
+ const char *node_name,
+ QDict *options, int flags,
+ Error **errp);
+BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
+ int flags, Error **errp);
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs, QDict *options,
+ bool keep_old_opts);
+void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
+ Error **errp);
+int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
+ Error **errp);
+BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
+ const char *backing_file);
+void bdrv_refresh_filename(BlockDriverState *bs);
+void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp);
+int bdrv_commit(BlockDriverState *bs);
+int bdrv_make_empty(BdrvChild *c, Error **errp);
+int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
+ const char *backing_fmt, bool warn);
+void bdrv_register(BlockDriver *bdrv);
+int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
+ const char *backing_file_str);
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+ BlockDriverState *bs);
+BlockDriverState *bdrv_find_base(BlockDriverState *bs);
+bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
+ Error **errp);
+int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
+ Error **errp);
+void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base);
+
+/*
+ * The units of offset and total_work_size may be chosen arbitrarily by the
+ * block driver; total_work_size may change during the course of the amendment
+ * operation
+ */
+typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
+ int64_t total_work_size, void *opaque);
+int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+ bool force,
+ Error **errp);
+
+/* check if a named node can be replaced when doing drive-mirror */
+BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
+ const char *node_name, Error **errp);
+
+int bdrv_activate(BlockDriverState *bs, Error **errp);
+void bdrv_activate_all(Error **errp);
+int bdrv_inactivate_all(void);
+
+int bdrv_flush_all(void);
+void bdrv_close_all(void);
+void bdrv_drain_all_begin(void);
+void bdrv_drain_all_end(void);
+void bdrv_drain_all(void);
+
+int bdrv_has_zero_init_1(BlockDriverState *bs);
+int bdrv_has_zero_init(BlockDriverState *bs);
+BlockDriverState *bdrv_find_node(const char *node_name);
+BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp);
+XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp);
+BlockDriverState *bdrv_lookup_bs(const char *device,
+ const char *node_name,
+ Error **errp);
+bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
+BlockDriverState *bdrv_next_node(BlockDriverState *bs);
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
+
+typedef struct BdrvNextIterator {
+ enum {
+ BDRV_NEXT_BACKEND_ROOTS,
+ BDRV_NEXT_MONITOR_OWNED,
+ } phase;
+ BlockBackend *blk;
+ BlockDriverState *bs;
+} BdrvNextIterator;
+
+BlockDriverState *bdrv_first(BdrvNextIterator *it);
+BlockDriverState *bdrv_next(BdrvNextIterator *it);
+void bdrv_next_cleanup(BdrvNextIterator *it);
+
+BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
+ void *opaque, bool read_only);
+int bdrv_get_flags(BlockDriverState *bs);
+char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp);
+char *bdrv_dirname(BlockDriverState *bs, Error **errp);
+
+void bdrv_img_create(const char *filename, const char *fmt,
+ const char *base_filename, const char *base_fmt,
+ char *options, uint64_t img_size, int flags,
+ bool quiet, Error **errp);
+
+void bdrv_ref(BlockDriverState *bs);
+void bdrv_unref(BlockDriverState *bs);
+void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
+BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
+ BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ Error **errp);
+
+bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
+void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
+void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason);
+void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
+void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
+bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
+
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+ const char *tag);
+int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
+
+/**
+ * Locks the AioContext of @bs if it's not the current AioContext. This avoids
+ * double locking which could lead to deadlocks: This is a coroutine_fn, so we
+ * know we already own the lock of the current AioContext.
+ *
+ * May only be called in the main thread.
+ */
+void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
+
+/**
+ * Unlocks the AioContext of @bs if it's not the current AioContext.
+ */
+void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
+
+void bdrv_set_aio_context_ignore(BlockDriverState *bs,
+ AioContext *new_context, GSList **ignore);
+int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
+ Error **errp);
+int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
+ BdrvChild *ignore_child, Error **errp);
+bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
+ GSList **ignore, Error **errp);
+bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
+ GSList **ignore, Error **errp);
+AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
+
+int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
+int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
+
+void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
+ Error **errp);
+void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
+
+/**
+ *
+ * bdrv_register_buf/bdrv_unregister_buf:
+ *
+ * Register/unregister a buffer for I/O. For example, VFIO drivers are
+ * interested to know the memory areas that would later be used for I/O, so
+ * that they can prepare IOMMU mapping etc., to get better performance.
+ */
+void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
+void bdrv_unregister_buf(BlockDriverState *bs, void *host);
+
+void bdrv_cancel_in_flight(BlockDriverState *bs);
+
+#endif /* BLOCK_GLOBAL_STATE_H */
diff --git a/include/block/block-io.h b/include/block/block-io.h
new file mode 100644
index 0000000000..5e3f346806
--- /dev/null
+++ b/include/block/block-io.h
@@ -0,0 +1,368 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_IO_H
+#define BLOCK_IO_H
+
+#include "block-common.h"
+
+/*
+ * I/O API functions. These functions are thread-safe, and therefore
+ * can run in any thread as long as the thread has called
+ * aio_context_acquire/release().
+ *
+ * These functions can only call functions from I/O and Common categories,
+ * but can be invoked by GS, "I/O or GS" and I/O APIs.
+ *
+ * All functions in this category must use the macro
+ * IO_CODE();
+ * to catch when they are accidentally called by the wrong API.
+ */
+
+int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags);
+int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
+int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes);
+int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
+ int64_t bytes);
+int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
+ const void *buf, int64_t bytes);
+/*
+ * Efficiently zero a region of the disk image. Note that this is a regular
+ * I/O request like read or write and should have a reasonable size. This
+ * function is not suitable for zeroing the entire image in a single request
+ * because it may allocate memory for the entire region.
+ */
+int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags);
+
+int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
+ PreallocMode prealloc, BdrvRequestFlags flags,
+ Error **errp);
+
+int64_t bdrv_nb_sectors(BlockDriverState *bs);
+int64_t bdrv_getlength(BlockDriverState *bs);
+int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
+BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
+ BlockDriverState *in_bs, Error **errp);
+void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
+int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
+void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs);
+
+
+/* async block I/O */
+void bdrv_aio_cancel(BlockAIOCB *acb);
+void bdrv_aio_cancel_async(BlockAIOCB *acb);
+
+/* sg packet commands */
+int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
+
+/* Ensure contents are flushed to disk. */
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
+
+int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
+bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
+int bdrv_block_status(BlockDriverState *bs, int64_t offset,
+ int64_t bytes, int64_t *pnum, int64_t *map,
+ BlockDriverState **file);
+int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
+ int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file);
+int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ int64_t *pnum);
+int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
+ bool include_base, int64_t offset, int64_t bytes,
+ int64_t *pnum);
+int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
+ int64_t bytes);
+
+int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
+ bool ignore_allow_rdw, Error **errp);
+int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
+ Error **errp);
+bool bdrv_is_read_only(BlockDriverState *bs);
+bool bdrv_is_writable(BlockDriverState *bs);
+bool bdrv_is_sg(BlockDriverState *bs);
+bool bdrv_is_inserted(BlockDriverState *bs);
+void bdrv_lock_medium(BlockDriverState *bs, bool locked);
+void bdrv_eject(BlockDriverState *bs, bool eject_flag);
+const char *bdrv_get_format_name(BlockDriverState *bs);
+
+bool bdrv_supports_compressed_writes(BlockDriverState *bs);
+const char *bdrv_get_node_name(const BlockDriverState *bs);
+const char *bdrv_get_device_name(const BlockDriverState *bs);
+const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
+int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
+ Error **errp);
+BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs);
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ int64_t *cluster_offset,
+ int64_t *cluster_bytes);
+
+void bdrv_get_backing_filename(BlockDriverState *bs,
+ char *filename, int filename_size);
+
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size);
+
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size);
+
+/*
+ * Returns the alignment in bytes that is required so that no bounce buffer
+ * is required throughout the stack
+ */
+size_t bdrv_min_mem_align(BlockDriverState *bs);
+/* Returns optimal alignment in bytes for bounce buffer */
+size_t bdrv_opt_mem_align(BlockDriverState *bs);
+void *qemu_blockalign(BlockDriverState *bs, size_t size);
+void *qemu_blockalign0(BlockDriverState *bs, size_t size);
+void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
+void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
+
+void bdrv_enable_copy_on_read(BlockDriverState *bs);
+void bdrv_disable_copy_on_read(BlockDriverState *bs);
+
+void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event);
+
+#define BLKDBG_EVENT(child, evt) \
+ do { \
+ if (child) { \
+ bdrv_debug_event(child->bs, evt); \
+ } \
+ } while (0)
+
+/**
+ * bdrv_get_aio_context:
+ *
+ * Returns: the currently bound #AioContext
+ */
+AioContext *bdrv_get_aio_context(BlockDriverState *bs);
+
+/**
+ * Move the current coroutine to the AioContext of @bs and return the old
+ * AioContext of the coroutine. Increase bs->in_flight so that draining @bs
+ * will wait for the operation to proceed until the corresponding
+ * bdrv_co_leave().
+ *
+ * Consequently, you can't call drain inside a bdrv_co_enter/leave() section as
+ * this will deadlock.
+ */
+AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs);
+
+/**
+ * Ends a section started by bdrv_co_enter(). Move the current coroutine back
+ * to old_ctx and decrease bs->in_flight again.
+ */
+void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx);
+
+/**
+ * Transfer control to @co in the aio context of @bs
+ */
+void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co);
+
+AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
+
+void bdrv_io_plug(BlockDriverState *bs);
+void bdrv_io_unplug(BlockDriverState *bs);
+
+bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+ uint32_t granularity, Error **errp);
+
+/**
+ *
+ * bdrv_co_copy_range:
+ *
+ * Do offloaded copy between two children. If the operation is not implemented
+ * by the driver, or if the backend storage doesn't support it, a negative
+ * error code will be returned.
+ *
+ * Note: block layer doesn't emulate or fallback to a bounce buffer approach
+ * because usually the caller shouldn't attempt offloaded copy any more (e.g.
+ * calling copy_file_range(2)) after the first error, thus it should fall back
+ * to a read+write path in the caller level.
+ *
+ * @src: Source child to copy data from
+ * @src_offset: offset in @src image to read data
+ * @dst: Destination child to copy data to
+ * @dst_offset: offset in @dst image to write data
+ * @bytes: number of bytes to copy
+ * @flags: request flags. Supported flags:
+ * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
+ * write on @dst as if bdrv_co_pwrite_zeroes is
+ * called. Used to simplify caller code, or
+ * during BlockDriver.bdrv_co_copy_range_from()
+ * recursion.
+ * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
+ * requests currently in flight.
+ *
+ * Returns: 0 if succeeded; negative error code if failed.
+ **/
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
+ BdrvChild *dst, int64_t dst_offset,
+ int64_t bytes, BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+/**
+ * bdrv_drained_end_no_poll:
+ *
+ * Same as bdrv_drained_end(), but do not poll for the subgraph to
+ * actually become unquiesced. Therefore, no graph changes will occur
+ * with this function.
+ *
+ * *drained_end_counter is incremented for every background operation
+ * that is scheduled, and will be decremented for every operation once
+ * it settles. The caller must poll until it reaches 0. The counter
+ * should be accessed using atomic operations only.
+ */
+void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
+
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which
+ * requires the caller to be either in the main thread and hold
+ * the BlockdriverState (bs) AioContext lock, or directly in the
+ * home thread that runs the bs AioContext. Calling them from
+ * another thread in another AioContext would cause deadlocks.
+ *
+ * Therefore, these functions are not proper I/O, because they
+ * can't run in *any* iothreads, but only in a specific one.
+ *
+ * These functions can call any function from I/O, Common and this
+ * categories, but must be invoked only by other "I/O or GS" and GS APIs.
+ *
+ * All functions in this category must use the macro
+ * IO_OR_GS_CODE();
+ * to catch when they are accidentally called by the wrong API.
+ */
+
+#define BDRV_POLL_WHILE(bs, cond) ({ \
+ BlockDriverState *bs_ = (bs); \
+ IO_OR_GS_CODE(); \
+ AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
+ cond); })
+
+void bdrv_drain(BlockDriverState *bs);
+void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
+
+int generated_co_wrapper
+bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
+
+int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix);
+
+/* Invalidate any cached metadata used by image formats */
+int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs,
+ Error **errp);
+int generated_co_wrapper bdrv_flush(BlockDriverState *bs);
+int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset,
+ int64_t bytes);
+int generated_co_wrapper
+bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+int generated_co_wrapper
+bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+
+/**
+ * bdrv_parent_drained_begin_single:
+ *
+ * Begin a quiesced section for the parent of @c. If @poll is true, wait for
+ * any pending activity to cease.
+ */
+void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
+
+/**
+ * bdrv_parent_drained_end_single:
+ *
+ * End a quiesced section for the parent of @c.
+ *
+ * This polls @bs's AioContext until all scheduled sub-drained_ends
+ * have settled, which may result in graph changes.
+ */
+void bdrv_parent_drained_end_single(BdrvChild *c);
+
+/**
+ * bdrv_drain_poll:
+ *
+ * Poll for pending requests in @bs, its parents (except for @ignore_parent),
+ * and if @recursive is true its children as well (used for subtree drain).
+ *
+ * If @ignore_bds_parents is true, parents that are BlockDriverStates must
+ * ignore the drain request because they will be drained separately (used for
+ * drain_all).
+ *
+ * This is part of bdrv_drained_begin.
+ */
+bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
+ BdrvChild *ignore_parent, bool ignore_bds_parents);
+
+/**
+ * bdrv_drained_begin:
+ *
+ * Begin a quiesced section for exclusive access to the BDS, by disabling
+ * external request sources including NBD server, block jobs, and device model.
+ *
+ * This function can be recursive.
+ */
+void bdrv_drained_begin(BlockDriverState *bs);
+
+/**
+ * bdrv_do_drained_begin_quiesce:
+ *
+ * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
+ * running requests to complete.
+ */
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
+ BdrvChild *parent, bool ignore_bds_parents);
+
+/**
+ * Like bdrv_drained_begin, but recursively begins a quiesced section for
+ * exclusive access to all child nodes as well.
+ */
+void bdrv_subtree_drained_begin(BlockDriverState *bs);
+
+/**
+ * bdrv_drained_end:
+ *
+ * End a quiescent section started by bdrv_drained_begin().
+ *
+ * This polls @bs's AioContext until all scheduled sub-drained_ends
+ * have settled. On one hand, that may result in graph changes. On
+ * the other, this requires that the caller either runs in the main
+ * loop; or that all involved nodes (@bs and all of its parents) are
+ * in the caller's AioContext.
+ */
+void bdrv_drained_end(BlockDriverState *bs);
+
+/**
+ * End a quiescent section started by bdrv_subtree_drained_begin().
+ */
+void bdrv_subtree_drained_end(BlockDriverState *bs);
+
+#endif /* BLOCK_IO_H */
diff --git a/include/block/block.h b/include/block/block.h
index e1713ee306..1e6b8fef1e 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -1,864 +1,32 @@
-#ifndef BLOCK_H
-#define BLOCK_H
-
-#include "block/aio.h"
-#include "block/aio-wait.h"
-#include "qemu/iov.h"
-#include "qemu/coroutine.h"
-#include "block/accounting.h"
-#include "block/dirty-bitmap.h"
-#include "block/blockjob.h"
-#include "qemu/hbitmap.h"
-#include "qemu/transactions.h"
-
/*
- * generated_co_wrapper
- *
- * Function specifier, which does nothing but mark functions to be
- * generated by scripts/block-coroutine-wrapper.py
- *
- * Read more in docs/devel/block-coroutine-wrapper.rst
- */
-#define generated_co_wrapper
-
-/* block.c */
-typedef struct BlockDriver BlockDriver;
-typedef struct BdrvChild BdrvChild;
-typedef struct BdrvChildClass BdrvChildClass;
-
-typedef struct BlockDriverInfo {
- /* in bytes, 0 if irrelevant */
- int cluster_size;
- /* offset at which the VM state can be saved (0 if not possible) */
- int64_t vm_state_offset;
- bool is_dirty;
- /*
- * True if this block driver only supports compressed writes
- */
- bool needs_compressed_writes;
-} BlockDriverInfo;
-
-typedef struct BlockFragInfo {
- uint64_t allocated_clusters;
- uint64_t total_clusters;
- uint64_t fragmented_clusters;
- uint64_t compressed_clusters;
-} BlockFragInfo;
-
-typedef enum {
- BDRV_REQ_COPY_ON_READ = 0x1,
- BDRV_REQ_ZERO_WRITE = 0x2,
-
- /*
- * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
- * that the block driver should unmap (discard) blocks if it is guaranteed
- * that the result will read back as zeroes. The flag is only passed to the
- * driver if the block device is opened with BDRV_O_UNMAP.
- */
- BDRV_REQ_MAY_UNMAP = 0x4,
-
- BDRV_REQ_FUA = 0x10,
- BDRV_REQ_WRITE_COMPRESSED = 0x20,
-
- /* Signifies that this write request will not change the visible disk
- * content. */
- BDRV_REQ_WRITE_UNCHANGED = 0x40,
-
- /* Forces request serialisation. Use only with write requests. */
- BDRV_REQ_SERIALISING = 0x80,
-
- /* Execute the request only if the operation can be offloaded or otherwise
- * be executed efficiently, but return an error instead of using a slow
- * fallback. */
- BDRV_REQ_NO_FALLBACK = 0x100,
-
- /*
- * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
- * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
- * filter is involved), in which case it signals that the COR operation
- * need not read the data into memory (qiov) but only ensure they are
- * copied to the top layer (i.e., that COR operation is done).
- */
- BDRV_REQ_PREFETCH = 0x200,
-
- /*
- * If we need to wait for other requests, just fail immediately. Used
- * only together with BDRV_REQ_SERIALISING.
- */
- BDRV_REQ_NO_WAIT = 0x400,
-
- /* Mask of valid flags */
- BDRV_REQ_MASK = 0x7ff,
-} BdrvRequestFlags;
-
-typedef struct BlockSizes {
- uint32_t phys;
- uint32_t log;
-} BlockSizes;
-
-typedef struct HDGeometry {
- uint32_t heads;
- uint32_t sectors;
- uint32_t cylinders;
-} HDGeometry;
-
-#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */
-#define BDRV_O_RDWR 0x0002
-#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
-#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
-#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
-#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
-#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */
-#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
-#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
-#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
-#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */
-#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
-#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
-#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
-#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given:
- select an appropriate protocol driver,
- ignoring the format layer */
-#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
-#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening read-write fails */
-#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
-
-#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
-
-
-/* Option names of options parsed by the block layer */
-
-#define BDRV_OPT_CACHE_WB "cache.writeback"
-#define BDRV_OPT_CACHE_DIRECT "cache.direct"
-#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
-#define BDRV_OPT_READ_ONLY "read-only"
-#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
-#define BDRV_OPT_DISCARD "discard"
-#define BDRV_OPT_FORCE_SHARE "force-share"
-
-
-#define BDRV_SECTOR_BITS 9
-#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
-
-#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \
- INT_MAX >> BDRV_SECTOR_BITS)
-#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
-
-/*
- * We want allow aligning requests and disk length up to any 32bit alignment
- * and don't afraid of overflow.
- * To achieve it, and in the same time use some pretty number as maximum disk
- * size, let's define maximum "length" (a limit for any offset/bytes request and
- * for disk size) to be the greatest power of 2 less than INT64_MAX.
- */
-#define BDRV_MAX_ALIGNMENT (1L << 30)
-#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT))
-
-/*
- * Allocation status flags for bdrv_block_status() and friends.
- *
- * Public flags:
- * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
- * BDRV_BLOCK_ZERO: offset reads as zero
- * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
- * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
- * layer rather than any backing, set by block layer
- * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
- * layer, set by block layer
- *
- * Internal flags:
- * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
- * that the block layer recompute the answer from the returned
- * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
- * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
- * zeroes in file child of current block node inside
- * returned region. Only valid together with both
- * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
- * appear with BDRV_BLOCK_ZERO.
- *
- * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
- * host offset within the returned BDS that is allocated for the
- * corresponding raw guest data. However, whether that offset
- * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
- *
- * DATA ZERO OFFSET_VALID
- * t t t sectors read as zero, returned file is zero at offset
- * t f t sectors read as valid from file at offset
- * f t t sectors preallocated, read as zero, returned file not
- * necessarily zero at offset
- * f f t sectors preallocated but read from backing_hd,
- * returned file contains garbage at offset
- * t t f sectors preallocated, read as zero, unknown offset
- * t f f sectors read from unknown file or offset
- * f t f not allocated or unknown offset, read as zero
- * f f f not allocated or unknown offset, read from backing_hd
- */
-#define BDRV_BLOCK_DATA 0x01
-#define BDRV_BLOCK_ZERO 0x02
-#define BDRV_BLOCK_OFFSET_VALID 0x04
-#define BDRV_BLOCK_RAW 0x08
-#define BDRV_BLOCK_ALLOCATED 0x10
-#define BDRV_BLOCK_EOF 0x20
-#define BDRV_BLOCK_RECURSE 0x40
-
-typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
-
-typedef struct BDRVReopenState {
- BlockDriverState *bs;
- int flags;
- BlockdevDetectZeroesOptions detect_zeroes;
- bool backing_missing;
- BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
- BlockDriverState *old_file_bs; /* keep pointer for permissions update */
- QDict *options;
- QDict *explicit_options;
- void *opaque;
-} BDRVReopenState;
-
-/*
- * Block operation types
- */
-typedef enum BlockOpType {
- BLOCK_OP_TYPE_BACKUP_SOURCE,
- BLOCK_OP_TYPE_BACKUP_TARGET,
- BLOCK_OP_TYPE_CHANGE,
- BLOCK_OP_TYPE_COMMIT_SOURCE,
- BLOCK_OP_TYPE_COMMIT_TARGET,
- BLOCK_OP_TYPE_DATAPLANE,
- BLOCK_OP_TYPE_DRIVE_DEL,
- BLOCK_OP_TYPE_EJECT,
- BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
- BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
- BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
- BLOCK_OP_TYPE_MIRROR_SOURCE,
- BLOCK_OP_TYPE_MIRROR_TARGET,
- BLOCK_OP_TYPE_RESIZE,
- BLOCK_OP_TYPE_STREAM,
- BLOCK_OP_TYPE_REPLACE,
- BLOCK_OP_TYPE_MAX,
-} BlockOpType;
-
-/* Block node permission constants */
-enum {
- /**
- * A user that has the "permission" of consistent reads is guaranteed that
- * their view of the contents of the block device is complete and
- * self-consistent, representing the contents of a disk at a specific
- * point.
- *
- * For most block devices (including their backing files) this is true, but
- * the property cannot be maintained in a few situations like for
- * intermediate nodes of a commit block job.
- */
- BLK_PERM_CONSISTENT_READ = 0x01,
-
- /** This permission is required to change the visible disk contents. */
- BLK_PERM_WRITE = 0x02,
-
- /**
- * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
- * required for writes to the block node when the caller promises that
- * the visible disk content doesn't change.
- *
- * As the BLK_PERM_WRITE permission is strictly stronger, either is
- * sufficient to perform an unchanging write.
- */
- BLK_PERM_WRITE_UNCHANGED = 0x04,
-
- /** This permission is required to change the size of a block node. */
- BLK_PERM_RESIZE = 0x08,
-
- /**
- * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU
- * 6.1 and earlier may still lock the corresponding byte in block/file-posix
- * locking. So, implementing some new permission should be very careful to
- * not interfere with this old unused thing.
- */
-
- BLK_PERM_ALL = 0x0f,
-
- DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ
- | BLK_PERM_WRITE
- | BLK_PERM_WRITE_UNCHANGED
- | BLK_PERM_RESIZE,
-
- DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
-};
-
-/*
- * Flags that parent nodes assign to child nodes to specify what kind of
- * role(s) they take.
- *
- * At least one of DATA, METADATA, FILTERED, or COW must be set for
- * every child.
- */
-enum BdrvChildRoleBits {
- /*
- * This child stores data.
- * Any node may have an arbitrary number of such children.
- */
- BDRV_CHILD_DATA = (1 << 0),
-
- /*
- * This child stores metadata.
- * Any node may have an arbitrary number of metadata-storing
- * children.
- */
- BDRV_CHILD_METADATA = (1 << 1),
-
- /*
- * A child that always presents exactly the same visible data as
- * the parent, e.g. by virtue of the parent forwarding all reads
- * and writes.
- * This flag is mutually exclusive with DATA, METADATA, and COW.
- * Any node may have at most one filtered child at a time.
- */
- BDRV_CHILD_FILTERED = (1 << 2),
-
- /*
- * Child from which to read all data that isn't allocated in the
- * parent (i.e., the backing child); such data is copied to the
- * parent through COW (and optionally COR).
- * This field is mutually exclusive with DATA, METADATA, and
- * FILTERED.
- * Any node may have at most one such backing child at a time.
- */
- BDRV_CHILD_COW = (1 << 3),
-
- /*
- * The primary child. For most drivers, this is the child whose
- * filename applies best to the parent node.
- * Any node may have at most one primary child at a time.
- */
- BDRV_CHILD_PRIMARY = (1 << 4),
-
- /* Useful combination of flags */
- BDRV_CHILD_IMAGE = BDRV_CHILD_DATA
- | BDRV_CHILD_METADATA
- | BDRV_CHILD_PRIMARY,
-};
-
-/* Mask of BdrvChildRoleBits values */
-typedef unsigned int BdrvChildRole;
-
-char *bdrv_perm_names(uint64_t perm);
-uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm);
-
-void bdrv_init(void);
-void bdrv_init_with_whitelist(void);
-bool bdrv_uses_whitelist(void);
-int bdrv_is_whitelisted(BlockDriver *drv, bool read_only);
-BlockDriver *bdrv_find_protocol(const char *filename,
- bool allow_protocol_prefix,
- Error **errp);
-BlockDriver *bdrv_find_format(const char *format_name);
-int bdrv_create(BlockDriver *drv, const char* filename,
- QemuOpts *opts, Error **errp);
-int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
-
-BlockDriverState *bdrv_new(void);
-int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
- Error **errp);
-int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
- Error **errp);
-int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
- Error **errp);
-BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
- int flags, Error **errp);
-int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
-
-int bdrv_parse_aio(const char *mode, int *flags);
-int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
-int bdrv_parse_discard_flags(const char *mode, int *flags);
-BdrvChild *bdrv_open_child(const char *filename,
- QDict *options, const char *bdref_key,
- BlockDriverState* parent,
- const BdrvChildClass *child_class,
- BdrvChildRole child_role,
- bool allow_none, Error **errp);
-BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
-int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
- Error **errp);
-int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
- const char *bdref_key, Error **errp);
-BlockDriverState *bdrv_open(const char *filename, const char *reference,
- QDict *options, int flags, Error **errp);
-BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
- const char *node_name,
- QDict *options, int flags,
- Error **errp);
-BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
- int flags, Error **errp);
-BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
- BlockDriverState *bs, QDict *options,
- bool keep_old_opts);
-void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue);
-int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
-int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
- Error **errp);
-int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
- Error **errp);
-int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags);
-int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
-int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes);
-int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
- int64_t bytes);
-int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
- const void *buf, int64_t bytes);
-/*
- * Efficiently zero a region of the disk image. Note that this is a regular
- * I/O request like read or write and should have a reasonable size. This
- * function is not suitable for zeroing the entire image in a single request
- * because it may allocate memory for the entire region.
- */
-int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags);
-BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
- const char *backing_file);
-void bdrv_refresh_filename(BlockDriverState *bs);
-
-int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
- PreallocMode prealloc, BdrvRequestFlags flags,
- Error **errp);
-int generated_co_wrapper
-bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
- PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
-
-int64_t bdrv_nb_sectors(BlockDriverState *bs);
-int64_t bdrv_getlength(BlockDriverState *bs);
-int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
-BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
- BlockDriverState *in_bs, Error **errp);
-void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
-void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp);
-int bdrv_commit(BlockDriverState *bs);
-int bdrv_make_empty(BdrvChild *c, Error **errp);
-int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
- const char *backing_fmt, bool warn);
-void bdrv_register(BlockDriver *bdrv);
-int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
- const char *backing_file_str);
-BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
- BlockDriverState *bs);
-BlockDriverState *bdrv_find_base(BlockDriverState *bs);
-bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
- Error **errp);
-int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
- Error **errp);
-void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base);
-int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
-void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs);
-
-
-typedef struct BdrvCheckResult {
- int corruptions;
- int leaks;
- int check_errors;
- int corruptions_fixed;
- int leaks_fixed;
- int64_t image_end_offset;
- BlockFragInfo bfi;
-} BdrvCheckResult;
-
-typedef enum {
- BDRV_FIX_LEAKS = 1,
- BDRV_FIX_ERRORS = 2,
-} BdrvCheckMode;
-
-int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix);
-
-/* The units of offset and total_work_size may be chosen arbitrarily by the
- * block driver; total_work_size may change during the course of the amendment
- * operation */
-typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
- int64_t total_work_size, void *opaque);
-int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
- bool force,
- Error **errp);
-
-/* check if a named node can be replaced when doing drive-mirror */
-BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
- const char *node_name, Error **errp);
-
-/* async block I/O */
-void bdrv_aio_cancel(BlockAIOCB *acb);
-void bdrv_aio_cancel_async(BlockAIOCB *acb);
-
-/* sg packet commands */
-int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
-
-/* Invalidate any cached metadata used by image formats */
-int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs,
- Error **errp);
-void bdrv_invalidate_cache_all(Error **errp);
-int bdrv_inactivate_all(void);
-
-/* Ensure contents are flushed to disk. */
-int generated_co_wrapper bdrv_flush(BlockDriverState *bs);
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
-int bdrv_flush_all(void);
-void bdrv_close_all(void);
-void bdrv_drain(BlockDriverState *bs);
-void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
-void bdrv_drain_all_begin(void);
-void bdrv_drain_all_end(void);
-void bdrv_drain_all(void);
-
-#define BDRV_POLL_WHILE(bs, cond) ({ \
- BlockDriverState *bs_ = (bs); \
- AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
- cond); })
-
-int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset,
- int64_t bytes);
-int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
-int bdrv_has_zero_init_1(BlockDriverState *bs);
-int bdrv_has_zero_init(BlockDriverState *bs);
-bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
-int bdrv_block_status(BlockDriverState *bs, int64_t offset,
- int64_t bytes, int64_t *pnum, int64_t *map,
- BlockDriverState **file);
-int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
- int64_t offset, int64_t bytes, int64_t *pnum,
- int64_t *map, BlockDriverState **file);
-int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
- int64_t *pnum);
-int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
- bool include_base, int64_t offset, int64_t bytes,
- int64_t *pnum);
-int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
- int64_t bytes);
-
-bool bdrv_is_read_only(BlockDriverState *bs);
-int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
- bool ignore_allow_rdw, Error **errp);
-int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
- Error **errp);
-bool bdrv_is_writable(BlockDriverState *bs);
-bool bdrv_is_sg(BlockDriverState *bs);
-bool bdrv_is_inserted(BlockDriverState *bs);
-void bdrv_lock_medium(BlockDriverState *bs, bool locked);
-void bdrv_eject(BlockDriverState *bs, bool eject_flag);
-const char *bdrv_get_format_name(BlockDriverState *bs);
-BlockDriverState *bdrv_find_node(const char *node_name);
-BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp);
-XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp);
-BlockDriverState *bdrv_lookup_bs(const char *device,
- const char *node_name,
- Error **errp);
-bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
-BlockDriverState *bdrv_next_node(BlockDriverState *bs);
-BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
-
-typedef struct BdrvNextIterator {
- enum {
- BDRV_NEXT_BACKEND_ROOTS,
- BDRV_NEXT_MONITOR_OWNED,
- } phase;
- BlockBackend *blk;
- BlockDriverState *bs;
-} BdrvNextIterator;
-
-BlockDriverState *bdrv_first(BdrvNextIterator *it);
-BlockDriverState *bdrv_next(BdrvNextIterator *it);
-void bdrv_next_cleanup(BdrvNextIterator *it);
-
-BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
-bool bdrv_supports_compressed_writes(BlockDriverState *bs);
-void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
- void *opaque, bool read_only);
-const char *bdrv_get_node_name(const BlockDriverState *bs);
-const char *bdrv_get_device_name(const BlockDriverState *bs);
-const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
-int bdrv_get_flags(BlockDriverState *bs);
-int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
-ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
- Error **errp);
-BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs);
-void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t offset, int64_t bytes,
- int64_t *cluster_offset,
- int64_t *cluster_bytes);
-
-void bdrv_get_backing_filename(BlockDriverState *bs,
- char *filename, int filename_size);
-char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp);
-char *bdrv_get_full_backing_filename_from_filename(const char *backed,
- const char *backing,
- Error **errp);
-char *bdrv_dirname(BlockDriverState *bs, Error **errp);
-
-int path_has_protocol(const char *path);
-int path_is_absolute(const char *path);
-char *path_combine(const char *base_path, const char *filename);
-
-int generated_co_wrapper
-bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
-int generated_co_wrapper
-bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
-int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
- int64_t pos, int size);
-
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size);
-
-void bdrv_img_create(const char *filename, const char *fmt,
- const char *base_filename, const char *base_fmt,
- char *options, uint64_t img_size, int flags,
- bool quiet, Error **errp);
-
-/* Returns the alignment in bytes that is required so that no bounce buffer
- * is required throughout the stack */
-size_t bdrv_min_mem_align(BlockDriverState *bs);
-/* Returns optimal alignment in bytes for bounce buffer */
-size_t bdrv_opt_mem_align(BlockDriverState *bs);
-void *qemu_blockalign(BlockDriverState *bs, size_t size);
-void *qemu_blockalign0(BlockDriverState *bs, size_t size);
-void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
-void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
-
-void bdrv_enable_copy_on_read(BlockDriverState *bs);
-void bdrv_disable_copy_on_read(BlockDriverState *bs);
-
-void bdrv_ref(BlockDriverState *bs);
-void bdrv_unref(BlockDriverState *bs);
-void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
-BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
- BlockDriverState *child_bs,
- const char *child_name,
- const BdrvChildClass *child_class,
- BdrvChildRole child_role,
- Error **errp);
-
-bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
-void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
-void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason);
-void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
-void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
-bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
-
-#define BLKDBG_EVENT(child, evt) \
- do { \
- if (child) { \
- bdrv_debug_event(child->bs, evt); \
- } \
- } while (0)
-
-void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event);
-
-int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
- const char *tag);
-int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
-int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
-bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
-
-/**
- * bdrv_get_aio_context:
+ * QEMU System Emulator block driver
*
- * Returns: the currently bound #AioContext
- */
-AioContext *bdrv_get_aio_context(BlockDriverState *bs);
-
-/**
- * Move the current coroutine to the AioContext of @bs and return the old
- * AioContext of the coroutine. Increase bs->in_flight so that draining @bs
- * will wait for the operation to proceed until the corresponding
- * bdrv_co_leave().
+ * Copyright (c) 2003 Fabrice Bellard
*
- * Consequently, you can't call drain inside a bdrv_co_enter/leave() section as
- * this will deadlock.
- */
-AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs);
-
-/**
- * Ends a section started by bdrv_co_enter(). Move the current coroutine back
- * to old_ctx and decrease bs->in_flight again.
- */
-void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx);
-
-/**
- * Locks the AioContext of @bs if it's not the current AioContext. This avoids
- * double locking which could lead to deadlocks: This is a coroutine_fn, so we
- * know we already own the lock of the current AioContext.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
*
- * May only be called in the main thread.
- */
-void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
-
-/**
- * Unlocks the AioContext of @bs if it's not the current AioContext.
- */
-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
-
-/**
- * Transfer control to @co in the aio context of @bs
- */
-void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co);
-
-void bdrv_set_aio_context_ignore(BlockDriverState *bs,
- AioContext *new_context, GSList **ignore);
-int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
- Error **errp);
-int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
- BdrvChild *ignore_child, Error **errp);
-bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
- GSList **ignore, Error **errp);
-bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
- GSList **ignore, Error **errp);
-AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
-AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
-
-int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
-int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
-
-void bdrv_io_plug(BlockDriverState *bs);
-void bdrv_io_unplug(BlockDriverState *bs);
-
-/**
- * bdrv_parent_drained_begin_single:
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
*
- * Begin a quiesced section for the parent of @c. If @poll is true, wait for
- * any pending activity to cease.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
*/
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
-
-/**
- * bdrv_parent_drained_end_single:
- *
- * End a quiesced section for the parent of @c.
- *
- * This polls @bs's AioContext until all scheduled sub-drained_ends
- * have settled, which may result in graph changes.
- */
-void bdrv_parent_drained_end_single(BdrvChild *c);
-
-/**
- * bdrv_drain_poll:
- *
- * Poll for pending requests in @bs, its parents (except for @ignore_parent),
- * and if @recursive is true its children as well (used for subtree drain).
- *
- * If @ignore_bds_parents is true, parents that are BlockDriverStates must
- * ignore the drain request because they will be drained separately (used for
- * drain_all).
- *
- * This is part of bdrv_drained_begin.
- */
-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
- BdrvChild *ignore_parent, bool ignore_bds_parents);
-
-/**
- * bdrv_drained_begin:
- *
- * Begin a quiesced section for exclusive access to the BDS, by disabling
- * external request sources including NBD server, block jobs, and device model.
- *
- * This function can be recursive.
- */
-void bdrv_drained_begin(BlockDriverState *bs);
-
-/**
- * bdrv_do_drained_begin_quiesce:
- *
- * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
- * running requests to complete.
- */
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
- BdrvChild *parent, bool ignore_bds_parents);
-
-/**
- * Like bdrv_drained_begin, but recursively begins a quiesced section for
- * exclusive access to all child nodes as well.
- */
-void bdrv_subtree_drained_begin(BlockDriverState *bs);
-
-/**
- * bdrv_drained_end:
- *
- * End a quiescent section started by bdrv_drained_begin().
- *
- * This polls @bs's AioContext until all scheduled sub-drained_ends
- * have settled. On one hand, that may result in graph changes. On
- * the other, this requires that the caller either runs in the main
- * loop; or that all involved nodes (@bs and all of its parents) are
- * in the caller's AioContext.
- */
-void bdrv_drained_end(BlockDriverState *bs);
-
-/**
- * bdrv_drained_end_no_poll:
- *
- * Same as bdrv_drained_end(), but do not poll for the subgraph to
- * actually become unquiesced. Therefore, no graph changes will occur
- * with this function.
- *
- * *drained_end_counter is incremented for every background operation
- * that is scheduled, and will be decremented for every operation once
- * it settles. The caller must poll until it reaches 0. The counter
- * should be accessed using atomic operations only.
- */
-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
-
-/**
- * End a quiescent section started by bdrv_subtree_drained_begin().
- */
-void bdrv_subtree_drained_end(BlockDriverState *bs);
-
-void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
- Error **errp);
-void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
-
-bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
- uint32_t granularity, Error **errp);
-/**
- *
- * bdrv_register_buf/bdrv_unregister_buf:
- *
- * Register/unregister a buffer for I/O. For example, VFIO drivers are
- * interested to know the memory areas that would later be used for I/O, so
- * that they can prepare IOMMU mapping etc., to get better performance.
- */
-void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
-void bdrv_unregister_buf(BlockDriverState *bs, void *host);
+#ifndef BLOCK_H
+#define BLOCK_H
-/**
- *
- * bdrv_co_copy_range:
- *
- * Do offloaded copy between two children. If the operation is not implemented
- * by the driver, or if the backend storage doesn't support it, a negative
- * error code will be returned.
- *
- * Note: block layer doesn't emulate or fallback to a bounce buffer approach
- * because usually the caller shouldn't attempt offloaded copy any more (e.g.
- * calling copy_file_range(2)) after the first error, thus it should fall back
- * to a read+write path in the caller level.
- *
- * @src: Source child to copy data from
- * @src_offset: offset in @src image to read data
- * @dst: Destination child to copy data to
- * @dst_offset: offset in @dst image to write data
- * @bytes: number of bytes to copy
- * @flags: request flags. Supported flags:
- * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
- * write on @dst as if bdrv_co_pwrite_zeroes is
- * called. Used to simplify caller code, or
- * during BlockDriver.bdrv_co_copy_range_from()
- * recursion.
- * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
- * requests currently in flight.
- *
- * Returns: 0 if succeeded; negative error code if failed.
- **/
-int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
- BdrvChild *dst, int64_t dst_offset,
- int64_t bytes, BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
+#include "block-global-state.h"
+#include "block-io.h"
-void bdrv_cancel_in_flight(BlockDriverState *bs);
+/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
-#endif
+#endif /* BLOCK_H */
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
new file mode 100644
index 0000000000..8947abab76
--- /dev/null
+++ b/include/block/block_int-common.h
@@ -0,0 +1,1246 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_COMMON_H
+#define BLOCK_INT_COMMON_H
+
+#include "block/accounting.h"
+#include "block/block.h"
+#include "block/aio-wait.h"
+#include "qemu/queue.h"
+#include "qemu/coroutine.h"
+#include "qemu/stats64.h"
+#include "qemu/timer.h"
+#include "qemu/hbitmap.h"
+#include "block/snapshot.h"
+#include "qemu/throttle.h"
+#include "qemu/rcu.h"
+
+#define BLOCK_FLAG_LAZY_REFCOUNTS 8
+
+#define BLOCK_OPT_SIZE "size"
+#define BLOCK_OPT_ENCRYPT "encryption"
+#define BLOCK_OPT_ENCRYPT_FORMAT "encrypt.format"
+#define BLOCK_OPT_COMPAT6 "compat6"
+#define BLOCK_OPT_HWVERSION "hwversion"
+#define BLOCK_OPT_BACKING_FILE "backing_file"
+#define BLOCK_OPT_BACKING_FMT "backing_fmt"
+#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
+#define BLOCK_OPT_TABLE_SIZE "table_size"
+#define BLOCK_OPT_PREALLOC "preallocation"
+#define BLOCK_OPT_SUBFMT "subformat"
+#define BLOCK_OPT_COMPAT_LEVEL "compat"
+#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
+#define BLOCK_OPT_ADAPTER_TYPE "adapter_type"
+#define BLOCK_OPT_REDUNDANCY "redundancy"
+#define BLOCK_OPT_NOCOW "nocow"
+#define BLOCK_OPT_EXTENT_SIZE_HINT "extent_size_hint"
+#define BLOCK_OPT_OBJECT_SIZE "object_size"
+#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits"
+#define BLOCK_OPT_DATA_FILE "data_file"
+#define BLOCK_OPT_DATA_FILE_RAW "data_file_raw"
+#define BLOCK_OPT_COMPRESSION_TYPE "compression_type"
+#define BLOCK_OPT_EXTL2 "extended_l2"
+
+#define BLOCK_PROBE_BUF_SIZE 512
+
+enum BdrvTrackedRequestType {
+ BDRV_TRACKED_READ,
+ BDRV_TRACKED_WRITE,
+ BDRV_TRACKED_DISCARD,
+ BDRV_TRACKED_TRUNCATE,
+};
+
+/*
+ * That is not quite good that BdrvTrackedRequest structure is public,
+ * as block/io.c is very careful about incoming offset/bytes being
+ * correct. Be sure to assert bdrv_check_request() succeeded after any
+ * modification of BdrvTrackedRequest object out of block/io.c
+ */
+typedef struct BdrvTrackedRequest {
+ BlockDriverState *bs;
+ int64_t offset;
+ int64_t bytes;
+ enum BdrvTrackedRequestType type;
+
+ bool serialising;
+ int64_t overlap_offset;
+ int64_t overlap_bytes;
+
+ QLIST_ENTRY(BdrvTrackedRequest) list;
+ Coroutine *co; /* owner, used for deadlock detection */
+ CoQueue wait_queue; /* coroutines blocked on this request */
+
+ struct BdrvTrackedRequest *waiting_for;
+} BdrvTrackedRequest;
+
+
+struct BlockDriver {
+ /*
+ * These fields are initialized when this object is created,
+ * and are never changed afterwards.
+ */
+
+ const char *format_name;
+ int instance_size;
+
+ /*
+ * Set to true if the BlockDriver is a block filter. Block filters pass
+ * certain callbacks that refer to data (see block.c) to their bs->file
+ * or bs->backing (whichever one exists) if the driver doesn't implement
+ * them. Drivers that do not wish to forward must implement them and return
+ * -ENOTSUP.
+ * Note that filters are not allowed to modify data.
+ *
+ * Filters generally cannot have more than a single filtered child,
+ * because the data they present must at all times be the same as
+ * that on their filtered child. That would be impossible to
+ * achieve for multiple filtered children.
+ * (And this filtered child must then be bs->file or bs->backing.)
+ */
+ bool is_filter;
+ /*
+ * Set to true if the BlockDriver is a format driver. Format nodes
+ * generally do not expect their children to be other format nodes
+ * (except for backing files), and so format probing is disabled
+ * on those children.
+ */
+ bool is_format;
+
+ /*
+ * Drivers not implementing bdrv_parse_filename nor bdrv_open should have
+ * this field set to true, except ones that are defined only by their
+ * child's bs.
+ * An example of the last type will be the quorum block driver.
+ */
+ bool bdrv_needs_filename;
+
+ /*
+ * Set if a driver can support backing files. This also implies the
+ * following semantics:
+ *
+ * - Return status 0 of .bdrv_co_block_status means that corresponding
+ * blocks are not allocated in this layer of backing-chain
+ * - For such (unallocated) blocks, read will:
+ * - fill buffer with zeros if there is no backing file
+ * - read from the backing file otherwise, where the block layer
+ * takes care of reading zeros beyond EOF if backing file is short
+ */
+ bool supports_backing;
+
+ bool has_variable_length;
+
+ /*
+ * Drivers setting this field must be able to work with just a plain
+ * filename with '<protocol_name>:' as a prefix, and no other options.
+ * Options may be extracted from the filename by implementing
+ * bdrv_parse_filename.
+ */
+ const char *protocol_name;
+
+ /* List of options for creating images, terminated by name == NULL */
+ QemuOptsList *create_opts;
+
+ /* List of options for image amend */
+ QemuOptsList *amend_opts;
+
+ /*
+ * If this driver supports reopening images this contains a
+ * NULL-terminated list of the runtime options that can be
+ * modified. If an option in this list is unspecified during
+ * reopen then it _must_ be reset to its default value or return
+ * an error.
+ */
+ const char *const *mutable_opts;
+
+ /*
+ * Pointer to a NULL-terminated array of names of strong options
+ * that can be specified for bdrv_open(). A strong option is one
+ * that changes the data of a BDS.
+ * If this pointer is NULL, the array is considered empty.
+ * "filename" and "driver" are always considered strong.
+ */
+ const char *const *strong_runtime_opts;
+
+
+ /*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+ /*
+ * This function is invoked under BQL before .bdrv_co_amend()
+ * (which in contrast does not necessarily run under the BQL)
+ * to allow driver-specific initialization code that requires
+ * the BQL, like setting up specific permission flags.
+ */
+ int (*bdrv_amend_pre_run)(BlockDriverState *bs, Error **errp);
+ /*
+ * This function is invoked under BQL after .bdrv_co_amend()
+ * to allow cleaning up what was done in .bdrv_amend_pre_run().
+ */
+ void (*bdrv_amend_clean)(BlockDriverState *bs);
+
+ /*
+ * Return true if @to_replace can be replaced by a BDS with the
+ * same data as @bs without it affecting @bs's behavior (that is,
+ * without it being visible to @bs's parents).
+ */
+ bool (*bdrv_recurse_can_replace)(BlockDriverState *bs,
+ BlockDriverState *to_replace);
+
+ int (*bdrv_probe_device)(const char *filename);
+
+ /*
+ * Any driver implementing this callback is expected to be able to handle
+ * NULL file names in its .bdrv_open() implementation.
+ */
+ void (*bdrv_parse_filename)(const char *filename, QDict *options,
+ Error **errp);
+
+ /* For handling image reopen for split or non-split files. */
+ int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+ void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
+ void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state);
+ void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
+ void (*bdrv_join_options)(QDict *options, QDict *old_options);
+
+ int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp);
+
+ /* Protocol drivers should implement this instead of bdrv_open */
+ int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp);
+ void (*bdrv_close)(BlockDriverState *bs);
+
+ int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts,
+ Error **errp);
+ int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv,
+ const char *filename,
+ QemuOpts *opts,
+ Error **errp);
+
+ int (*bdrv_amend_options)(BlockDriverState *bs,
+ QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque,
+ bool force,
+ Error **errp);
+
+ int (*bdrv_make_empty)(BlockDriverState *bs);
+
+ /*
+ * Refreshes the bs->exact_filename field. If that is impossible,
+ * bs->exact_filename has to be left empty.
+ */
+ void (*bdrv_refresh_filename)(BlockDriverState *bs);
+
+ /*
+ * Gathers the open options for all children into @target.
+ * A simple format driver (without backing file support) might
+ * implement this function like this:
+ *
+ * QINCREF(bs->file->bs->full_open_options);
+ * qdict_put(target, "file", bs->file->bs->full_open_options);
+ *
+ * If not specified, the generic implementation will simply put
+ * all children's options under their respective name.
+ *
+ * @backing_overridden is true when bs->backing seems not to be
+ * the child that would result from opening bs->backing_file.
+ * Therefore, if it is true, the backing child's options should be
+ * gathered; otherwise, there is no need since the backing child
+ * is the one implied by the image header.
+ *
+ * Note that ideally this function would not be needed. Every
+ * block driver which implements it is probably doing something
+ * shady regarding its runtime option structure.
+ */
+ void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target,
+ bool backing_overridden);
+
+ /*
+ * Returns an allocated string which is the directory name of this BDS: It
+ * will be used to make relative filenames absolute by prepending this
+ * function's return value to them.
+ */
+ char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp);
+
+ /*
+ * This informs the driver that we are no longer interested in the result
+ * of in-flight requests, so don't waste the time if possible.
+ *
+ * One example usage is to avoid waiting for an nbd target node reconnect
+ * timeout during job-cancel with force=true.
+ */
+ void (*bdrv_cancel_in_flight)(BlockDriverState *bs);
+
+ int (*bdrv_inactivate)(BlockDriverState *bs);
+
+ int (*bdrv_snapshot_create)(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info);
+ int (*bdrv_snapshot_goto)(BlockDriverState *bs,
+ const char *snapshot_id);
+ int (*bdrv_snapshot_delete)(BlockDriverState *bs,
+ const char *snapshot_id,
+ const char *name,
+ Error **errp);
+ int (*bdrv_snapshot_list)(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info);
+ int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
+ const char *snapshot_id,
+ const char *name,
+ Error **errp);
+
+ int (*bdrv_change_backing_file)(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt);
+
+ /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
+ int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
+ const char *tag);
+ int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
+ const char *tag);
+ int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
+ bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
+
+ void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp);
+
+ /*
+ * Returns 1 if newly created images are guaranteed to contain only
+ * zeros, 0 otherwise.
+ */
+ int (*bdrv_has_zero_init)(BlockDriverState *bs);
+
+ /*
+ * Remove fd handlers, timers, and other event loop callbacks so the event
+ * loop is no longer in use. Called with no in-flight requests and in
+ * depth-first traversal order with parents before child nodes.
+ */
+ void (*bdrv_detach_aio_context)(BlockDriverState *bs);
+
+ /*
+ * Add fd handlers, timers, and other event loop callbacks so I/O requests
+ * can be processed again. Called with no in-flight requests and in
+ * depth-first traversal order with child nodes before parent nodes.
+ */
+ void (*bdrv_attach_aio_context)(BlockDriverState *bs,
+ AioContext *new_context);
+
+ /**
+ * Try to get @bs's logical and physical block size.
+ * On success, store them in @bsz and return zero.
+ * On failure, return negative errno.
+ */
+ int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz);
+ /**
+ * Try to get @bs's geometry (cyls, heads, sectors)
+ * On success, store them in @geo and return 0.
+ * On failure return -errno.
+ * Only drivers that want to override guest geometry implement this
+ * callback; see hd_geometry_guess().
+ */
+ int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
+
+ void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
+ Error **errp);
+ void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
+ Error **errp);
+
+ /**
+ * Informs the block driver that a permission change is intended. The
+ * driver checks whether the change is permissible and may take other
+ * preparations for the change (e.g. get file system locks). This operation
+ * is always followed either by a call to either .bdrv_set_perm or
+ * .bdrv_abort_perm_update.
+ *
+ * Checks whether the requested set of cumulative permissions in @perm
+ * can be granted for accessing @bs and whether no other users are using
+ * permissions other than those given in @shared (both arguments take
+ * BLK_PERM_* bitmasks).
+ *
+ * If both conditions are met, 0 is returned. Otherwise, -errno is returned
+ * and errp is set to an error describing the conflict.
+ */
+ int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
+ uint64_t shared, Error **errp);
+
+ /**
+ * Called to inform the driver that the set of cumulative set of used
+ * permissions for @bs has changed to @perm, and the set of sharable
+ * permission to @shared. The driver can use this to propagate changes to
+ * its children (i.e. request permissions only if a parent actually needs
+ * them).
+ *
+ * This function is only invoked after bdrv_check_perm(), so block drivers
+ * may rely on preparations made in their .bdrv_check_perm implementation.
+ */
+ void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared);
+
+ /*
+ * Called to inform the driver that after a previous bdrv_check_perm()
+ * call, the permission update is not performed and any preparations made
+ * for it (e.g. taken file locks) need to be undone.
+ *
+ * This function can be called even for nodes that never saw a
+ * bdrv_check_perm() call. It is a no-op then.
+ */
+ void (*bdrv_abort_perm_update)(BlockDriverState *bs);
+
+ /**
+ * Returns in @nperm and @nshared the permissions that the driver for @bs
+ * needs on its child @c, based on the cumulative permissions requested by
+ * the parents in @parent_perm and @parent_shared.
+ *
+ * If @c is NULL, return the permissions for attaching a new child for the
+ * given @child_class and @role.
+ *
+ * If @reopen_queue is non-NULL, don't return the currently needed
+ * permissions, but those that will be needed after applying the
+ * @reopen_queue.
+ */
+ void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c,
+ BdrvChildRole role,
+ BlockReopenQueue *reopen_queue,
+ uint64_t parent_perm, uint64_t parent_shared,
+ uint64_t *nperm, uint64_t *nshared);
+
+ /**
+ * Register/unregister a buffer for I/O. For example, when the driver is
+ * interested to know the memory areas that will later be used in iovs, so
+ * that it can do IOMMU mapping with VFIO etc., in order to get better
+ * performance. In the case of VFIO drivers, this callback is used to do
+ * DMA mapping for hot buffers.
+ */
+ void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
+ void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
+
+ /*
+ * This field is modified only under the BQL, and is part of
+ * the global state.
+ */
+ QLIST_ENTRY(BlockDriver) list;
+
+ /*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+ int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
+
+ int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs,
+ BlockdevAmendOptions *opts,
+ bool force,
+ Error **errp);
+
+ /* aio */
+ BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
+ BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
+ BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
+ BlockCompletionFunc *cb, void *opaque);
+ BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
+ int64_t offset, int bytes,
+ BlockCompletionFunc *cb, void *opaque);
+
+ int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+
+ /**
+ * @offset: position in bytes to read at
+ * @bytes: number of bytes to read
+ * @qiov: the buffers to fill with read data
+ * @flags: currently unused, always 0
+ *
+ * @offset and @bytes will be a multiple of 'request_alignment',
+ * but the length of individual @qiov elements does not have to
+ * be a multiple.
+ *
+ * @bytes will always equal the total size of @qiov, and will be
+ * no larger than 'max_transfer'.
+ *
+ * The buffer in @qiov may point directly to guest memory.
+ */
+ int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+
+ int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags);
+
+ int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ int flags);
+ /**
+ * @offset: position in bytes to write at
+ * @bytes: number of bytes to write
+ * @qiov: the buffers containing data to write
+ * @flags: zero or more bits allowed by 'supported_write_flags'
+ *
+ * @offset and @bytes will be a multiple of 'request_alignment',
+ * but the length of individual @qiov elements does not have to
+ * be a multiple.
+ *
+ * @bytes will always equal the total size of @qiov, and will be
+ * no larger than 'max_transfer'.
+ *
+ * The buffer in @qiov may point directly to guest memory.
+ */
+ int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+ int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags);
+
+ /*
+ * Efficiently zero a region of the disk image. Typically an image format
+ * would use a compact metadata representation to implement this. This
+ * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
+ * will be called instead.
+ */
+ int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, BdrvRequestFlags flags);
+ int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes);
+
+ /*
+ * Map [offset, offset + nbytes) range onto a child of @bs to copy from,
+ * and invoke bdrv_co_copy_range_from(child, ...), or invoke
+ * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
+ *
+ * See the comment of bdrv_co_copy_range for the parameter and return value
+ * semantics.
+ */
+ int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
+ BdrvChild *src,
+ int64_t offset,
+ BdrvChild *dst,
+ int64_t dst_offset,
+ int64_t bytes,
+ BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+ /*
+ * Map [offset, offset + nbytes) range onto a child of bs to copy data to,
+ * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
+ * operation if @bs is the leaf and @src has the same BlockDriver. Return
+ * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
+ *
+ * See the comment of bdrv_co_copy_range for the parameter and return value
+ * semantics.
+ */
+ int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
+ BdrvChild *src,
+ int64_t src_offset,
+ BdrvChild *dst,
+ int64_t dst_offset,
+ int64_t bytes,
+ BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+ /*
+ * Building block for bdrv_block_status[_above] and
+ * bdrv_is_allocated[_above]. The driver should answer only
+ * according to the current layer, and should only need to set
+ * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
+ * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
+ * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See
+ * block.h for the overall meaning of the bits. As a hint, the
+ * flag want_zero is true if the caller cares more about precise
+ * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
+ * overall allocation (favor larger *pnum, perhaps by reporting
+ * _DATA instead of _ZERO). The block layer guarantees input
+ * clamped to bdrv_getlength() and aligned to request_alignment,
+ * as well as non-NULL pnum, map, and file; in turn, the driver
+ * must return an error or set pnum to an aligned non-zero value.
+ *
+ * Note that @bytes is just a hint on how big of a region the
+ * caller wants to inspect. It is not a limit on *pnum.
+ * Implementations are free to return larger values of *pnum if
+ * doing so does not incur a performance penalty.
+ *
+ * block/io.c's bdrv_co_block_status() will utilize an unclamped
+ * *pnum value for the block-status cache on protocol nodes, prior
+ * to clamping *pnum for return to its caller.
+ */
+ int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
+ bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file);
+
+ /*
+ * Snapshot-access API.
+ *
+ * Block-driver may provide snapshot-access API: special functions to access
+ * some internal "snapshot". The functions are similar with normal
+ * read/block_status/discard handler, but don't have any specific handling
+ * in generic block-layer: no serializing, no alignment, no tracked
+ * requests. So, block-driver that realizes these APIs is fully responsible
+ * for synchronization between snapshot-access API and normal IO requests.
+ *
+ * TODO: To be able to support qcow2's internal snapshots, this API will
+ * need to be extended to:
+ * - be able to select a specific snapshot
+ * - receive the snapshot's actual length (which may differ from bs's
+ * length)
+ */
+ int coroutine_fn (*bdrv_co_preadv_snapshot)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
+ int coroutine_fn (*bdrv_co_snapshot_block_status)(BlockDriverState *bs,
+ bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file);
+ int coroutine_fn (*bdrv_co_pdiscard_snapshot)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes);
+
+ /*
+ * Invalidate any cached meta-data.
+ */
+ void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs,
+ Error **errp);
+
+ /*
+ * Flushes all data for all layers by calling bdrv_co_flush for underlying
+ * layers, if needed. This function is needed for deterministic
+ * synchronization of the flush finishing callback.
+ */
+ int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
+
+ /* Delete a created file. */
+ int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs,
+ Error **errp);
+
+ /*
+ * Flushes all data that was already written to the OS all the way down to
+ * the disk (for example file-posix.c calls fsync()).
+ */
+ int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
+
+ /*
+ * Flushes all internal caches to the OS. The data may still sit in a
+ * writeback cache of the host OS, but it will survive a crash of the qemu
+ * process.
+ */
+ int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
+
+ /*
+ * Truncate @bs to @offset bytes using the given @prealloc mode
+ * when growing. Modes other than PREALLOC_MODE_OFF should be
+ * rejected when shrinking @bs.
+ *
+ * If @exact is true, @bs must be resized to exactly @offset.
+ * Otherwise, it is sufficient for @bs (if it is a host block
+ * device and thus there is no way to resize it) to be at least
+ * @offset bytes in length.
+ *
+ * If @exact is true and this function fails but would succeed
+ * with @exact = false, it should return -ENOTSUP.
+ */
+ int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
+ bool exact, PreallocMode prealloc,
+ BdrvRequestFlags flags, Error **errp);
+ int64_t (*bdrv_getlength)(BlockDriverState *bs);
+ int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
+ BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs,
+ Error **errp);
+
+ int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov);
+ int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ size_t qiov_offset);
+
+ int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
+
+ ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs,
+ Error **errp);
+ BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs);
+
+ int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
+ QEMUIOVector *qiov,
+ int64_t pos);
+ int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
+ QEMUIOVector *qiov,
+ int64_t pos);
+
+ /* removable device specific */
+ bool (*bdrv_is_inserted)(BlockDriverState *bs);
+ void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
+ void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
+
+ /* to control generic scsi devices */
+ BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockCompletionFunc *cb, void *opaque);
+ int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs,
+ unsigned long int req, void *buf);
+
+ /*
+ * Returns 0 for completed check, -errno for internal errors.
+ * The check results are stored in result.
+ */
+ int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs,
+ BdrvCheckResult *result,
+ BdrvCheckMode fix);
+
+ void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
+
+ /* io queue for linux-aio */
+ void (*bdrv_io_plug)(BlockDriverState *bs);
+ void (*bdrv_io_unplug)(BlockDriverState *bs);
+
+ /**
+ * bdrv_co_drain_begin is called if implemented in the beginning of a
+ * drain operation to drain and stop any internal sources of requests in
+ * the driver.
+ * bdrv_co_drain_end is called if implemented at the end of the drain.
+ *
+ * They should be used by the driver to e.g. manage scheduled I/O
+ * requests, or toggle an internal state. After the end of the drain new
+ * requests will continue normally.
+ */
+ void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
+ void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
+
+ bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
+ bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs,
+ const char *name,
+ uint32_t granularity,
+ Error **errp);
+ int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs,
+ const char *name,
+ Error **errp);
+};
+
+static inline bool block_driver_can_compress(BlockDriver *drv)
+{
+ return drv->bdrv_co_pwritev_compressed ||
+ drv->bdrv_co_pwritev_compressed_part;
+}
+
+typedef struct BlockLimits {
+ /*
+ * Alignment requirement, in bytes, for offset/length of I/O
+ * requests. Must be a power of 2 less than INT_MAX; defaults to
+ * 1 for drivers with modern byte interfaces, and to 512
+ * otherwise.
+ */
+ uint32_t request_alignment;
+
+ /*
+ * Maximum number of bytes that can be discarded at once. Must be multiple
+ * of pdiscard_alignment, but need not be power of 2. May be 0 if no
+ * inherent 64-bit limit.
+ */
+ int64_t max_pdiscard;
+
+ /*
+ * Optimal alignment for discard requests in bytes. A power of 2
+ * is best but not mandatory. Must be a multiple of
+ * bl.request_alignment, and must be less than max_pdiscard if
+ * that is set. May be 0 if bl.request_alignment is good enough
+ */
+ uint32_t pdiscard_alignment;
+
+ /*
+ * Maximum number of bytes that can zeroized at once. Must be multiple of
+ * pwrite_zeroes_alignment. 0 means no limit.
+ */
+ int64_t max_pwrite_zeroes;
+
+ /*
+ * Optimal alignment for write zeroes requests in bytes. A power
+ * of 2 is best but not mandatory. Must be a multiple of
+ * bl.request_alignment, and must be less than max_pwrite_zeroes
+ * if that is set. May be 0 if bl.request_alignment is good
+ * enough
+ */
+ uint32_t pwrite_zeroes_alignment;
+
+ /*
+ * Optimal transfer length in bytes. A power of 2 is best but not
+ * mandatory. Must be a multiple of bl.request_alignment, or 0 if
+ * no preferred size
+ */
+ uint32_t opt_transfer;
+
+ /*
+ * Maximal transfer length in bytes. Need not be power of 2, but
+ * must be multiple of opt_transfer and bl.request_alignment, or 0
+ * for no 32-bit limit. For now, anything larger than INT_MAX is
+ * clamped down.
+ */
+ uint32_t max_transfer;
+
+ /*
+ * Maximal hardware transfer length in bytes. Applies whenever
+ * transfers to the device bypass the kernel I/O scheduler, for
+ * example with SG_IO. If larger than max_transfer or if zero,
+ * blk_get_max_hw_transfer will fall back to max_transfer.
+ */
+ uint64_t max_hw_transfer;
+
+ /*
+ * Maximal number of scatter/gather elements allowed by the hardware.
+ * Applies whenever transfers to the device bypass the kernel I/O
+ * scheduler, for example with SG_IO. If larger than max_iov
+ * or if zero, blk_get_max_hw_iov will fall back to max_iov.
+ */
+ int max_hw_iov;
+
+
+ /* memory alignment, in bytes so that no bounce buffer is needed */
+ size_t min_mem_alignment;
+
+ /* memory alignment, in bytes, for bounce buffer */
+ size_t opt_mem_alignment;
+
+ /* maximum number of iovec elements */
+ int max_iov;
+} BlockLimits;
+
+typedef struct BdrvOpBlocker BdrvOpBlocker;
+
+typedef struct BdrvAioNotifier {
+ void (*attached_aio_context)(AioContext *new_context, void *opaque);
+ void (*detach_aio_context)(void *opaque);
+
+ void *opaque;
+ bool deleted;
+
+ QLIST_ENTRY(BdrvAioNotifier) list;
+} BdrvAioNotifier;
+
+struct BdrvChildClass {
+ /*
+ * If true, bdrv_replace_node() doesn't change the node this BdrvChild
+ * points to.
+ */
+ bool stay_at_node;
+
+ /*
+ * If true, the parent is a BlockDriverState and bdrv_next_all_states()
+ * will return it. This information is used for drain_all, where every node
+ * will be drained separately, so the drain only needs to be propagated to
+ * non-BDS parents.
+ */
+ bool parent_is_bds;
+
+ /*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+ void (*inherit_options)(BdrvChildRole role, bool parent_is_format,
+ int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options);
+ void (*change_media)(BdrvChild *child, bool load);
+
+ /*
+ * Returns a malloced string that describes the parent of the child for a
+ * human reader. This could be a node-name, BlockBackend name, qdev ID or
+ * QOM path of the device owning the BlockBackend, job type and ID etc. The
+ * caller is responsible for freeing the memory.
+ */
+ char *(*get_parent_desc)(BdrvChild *child);
+
+ /*
+ * Notifies the parent that the child has been activated/inactivated (e.g.
+ * when migration is completing) and it can start/stop requesting
+ * permissions and doing I/O on it.
+ */
+ void (*activate)(BdrvChild *child, Error **errp);
+ int (*inactivate)(BdrvChild *child);
+
+ void (*attach)(BdrvChild *child);
+ void (*detach)(BdrvChild *child);
+
+ /*
+ * Notifies the parent that the filename of its child has changed (e.g.
+ * because the direct child was removed from the backing chain), so that it
+ * can update its reference.
+ */
+ int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
+ const char *filename, Error **errp);
+
+ bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx,
+ GSList **ignore, Error **errp);
+ void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore);
+
+ AioContext *(*get_parent_aio_context)(BdrvChild *child);
+
+ /*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+ void (*resize)(BdrvChild *child);
+
+ /*
+ * Returns a name that is supposedly more useful for human users than the
+ * node name for identifying the node in question (in particular, a BB
+ * name), or NULL if the parent can't provide a better name.
+ */
+ const char *(*get_name)(BdrvChild *child);
+
+ /*
+ * If this pair of functions is implemented, the parent doesn't issue new
+ * requests after returning from .drained_begin() until .drained_end() is
+ * called.
+ *
+ * These functions must not change the graph (and therefore also must not
+ * call aio_poll(), which could change the graph indirectly).
+ *
+ * If drained_end() schedules background operations, it must atomically
+ * increment *drained_end_counter for each such operation and atomically
+ * decrement it once the operation has settled.
+ *
+ * Note that this can be nested. If drained_begin() was called twice, new
+ * I/O is allowed only after drained_end() was called twice, too.
+ */
+ void (*drained_begin)(BdrvChild *child);
+ void (*drained_end)(BdrvChild *child, int *drained_end_counter);
+
+ /*
+ * Returns whether the parent has pending requests for the child. This
+ * callback is polled after .drained_begin() has been called until all
+ * activity on the child has stopped.
+ */
+ bool (*drained_poll)(BdrvChild *child);
+};
+
+extern const BdrvChildClass child_of_bds;
+
+struct BdrvChild {
+ BlockDriverState *bs;
+ char *name;
+ const BdrvChildClass *klass;
+ BdrvChildRole role;
+ void *opaque;
+
+ /**
+ * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask)
+ */
+ uint64_t perm;
+
+ /**
+ * Permissions that can still be granted to other users of @bs while this
+ * BdrvChild is still attached to it. (BLK_PERM_* bitmask)
+ */
+ uint64_t shared_perm;
+
+ /*
+ * This link is frozen: the child can neither be replaced nor
+ * detached from the parent.
+ */
+ bool frozen;
+
+ /*
+ * How many times the parent of this child has been drained
+ * (through klass->drained_*).
+ * Usually, this is equal to bs->quiesce_counter (potentially
+ * reduced by bdrv_drain_all_count). It may differ while the
+ * child is entering or leaving a drained section.
+ */
+ int parent_quiesce_counter;
+
+ QLIST_ENTRY(BdrvChild) next;
+ QLIST_ENTRY(BdrvChild) next_parent;
+};
+
+/*
+ * Allows bdrv_co_block_status() to cache one data region for a
+ * protocol node.
+ *
+ * @valid: Whether the cache is valid (should be accessed with atomic
+ * functions so this can be reset by RCU readers)
+ * @data_start: Offset where we know (or strongly assume) is data
+ * @data_end: Offset where the data region ends (which is not necessarily
+ * the start of a zeroed region)
+ */
+typedef struct BdrvBlockStatusCache {
+ struct rcu_head rcu;
+
+ bool valid;
+ int64_t data_start;
+ int64_t data_end;
+} BdrvBlockStatusCache;
+
+struct BlockDriverState {
+ /*
+ * Protected by big QEMU lock or read-only after opening. No special
+ * locking needed during I/O...
+ */
+ int open_flags; /* flags used to open the file, re-used for re-open */
+ bool encrypted; /* if true, the media is encrypted */
+ bool sg; /* if true, the device is a /dev/sg* */
+ bool probed; /* if true, format was probed rather than specified */
+ bool force_share; /* if true, always allow all shared permissions */
+ bool implicit; /* if true, this filter node was automatically inserted */
+
+ BlockDriver *drv; /* NULL means no media */
+ void *opaque;
+
+ AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
+ /*
+ * long-running tasks intended to always use the same AioContext as this
+ * BDS may register themselves in this list to be notified of changes
+ * regarding this BDS's context
+ */
+ QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
+ bool walking_aio_notifiers; /* to make removal during iteration safe */
+
+ char filename[PATH_MAX];
+ /*
+ * If not empty, this image is a diff in relation to backing_file.
+ * Note that this is the name given in the image header and
+ * therefore may or may not be equal to .backing->bs->filename.
+ * If this field contains a relative path, it is to be resolved
+ * relatively to the overlay's location.
+ */
+ char backing_file[PATH_MAX];
+ /*
+ * The backing filename indicated by the image header. Contrary
+ * to backing_file, if we ever open this file, auto_backing_file
+ * is replaced by the resulting BDS's filename (i.e. after a
+ * bdrv_refresh_filename() run).
+ */
+ char auto_backing_file[PATH_MAX];
+ char backing_format[16]; /* if non-zero and backing_file exists */
+
+ QDict *full_open_options;
+ char exact_filename[PATH_MAX];
+
+ BdrvChild *backing;
+ BdrvChild *file;
+
+ /* I/O Limits */
+ BlockLimits bl;
+
+ /*
+ * Flags honored during pread
+ */
+ unsigned int supported_read_flags;
+ /*
+ * Flags honored during pwrite (so far: BDRV_REQ_FUA,
+ * BDRV_REQ_WRITE_UNCHANGED).
+ * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
+ * writes will be issued as normal writes without the flag set.
+ * This is important to note for drivers that do not explicitly
+ * request a WRITE permission for their children and instead take
+ * the same permissions as their parent did (this is commonly what
+ * block filters do). Such drivers have to be aware that the
+ * parent may have taken a WRITE_UNCHANGED permission only and is
+ * issuing such requests. Drivers either must make sure that
+ * these requests do not result in plain WRITE accesses (usually
+ * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding
+ * every incoming write request as-is, including potentially that
+ * flag), or they have to explicitly take the WRITE permission for
+ * their children.
+ */
+ unsigned int supported_write_flags;
+ /*
+ * Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
+ * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED)
+ */
+ unsigned int supported_zero_flags;
+ /*
+ * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
+ *
+ * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure
+ * that any added space reads as all zeros. If this can't be guaranteed,
+ * the operation must fail.
+ */
+ unsigned int supported_truncate_flags;
+
+ /* the following member gives a name to every node on the bs graph. */
+ char node_name[32];
+ /* element of the list of named nodes building the graph */
+ QTAILQ_ENTRY(BlockDriverState) node_list;
+ /* element of the list of all BlockDriverStates (all_bdrv_states) */
+ QTAILQ_ENTRY(BlockDriverState) bs_list;
+ /* element of the list of monitor-owned BDS */
+ QTAILQ_ENTRY(BlockDriverState) monitor_list;
+ int refcnt;
+
+ /* operation blockers. Protected by BQL. */
+ QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];
+
+ /*
+ * The node that this node inherited default options from (and a reopen on
+ * which can affect this node by changing these defaults). This is always a
+ * parent node of this node.
+ */
+ BlockDriverState *inherits_from;
+ QLIST_HEAD(, BdrvChild) children;
+ QLIST_HEAD(, BdrvChild) parents;
+
+ QDict *options;
+ QDict *explicit_options;
+ BlockdevDetectZeroesOptions detect_zeroes;
+
+ /* The error object in use for blocking operations on backing_hd */
+ Error *backing_blocker;
+
+ /* Protected by AioContext lock */
+
+ /*
+ * If we are reading a disk image, give its size in sectors.
+ * Generally read-only; it is written to by load_snapshot and
+ * save_snaphost, but the block layer is quiescent during those.
+ */
+ int64_t total_sectors;
+
+ /* threshold limit for writes, in bytes. "High water mark". */
+ uint64_t write_threshold_offset;
+
+ /*
+ * Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
+ * Reading from the list can be done with either the BQL or the
+ * dirty_bitmap_mutex. Modifying a bitmap only requires
+ * dirty_bitmap_mutex.
+ */
+ QemuMutex dirty_bitmap_mutex;
+ QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
+
+ /* Offset after the highest byte written to */
+ Stat64 wr_highest_offset;
+
+ /*
+ * If true, copy read backing sectors into image. Can be >1 if more
+ * than one client has requested copy-on-read. Accessed with atomic
+ * ops.
+ */
+ int copy_on_read;
+
+ /*
+ * number of in-flight requests; overall and serialising.
+ * Accessed with atomic ops.
+ */
+ unsigned int in_flight;
+ unsigned int serialising_in_flight;
+
+ /*
+ * counter for nested bdrv_io_plug.
+ * Accessed with atomic ops.
+ */
+ unsigned io_plugged;
+
+ /* do we need to tell the quest if we have a volatile write cache? */
+ int enable_write_cache;
+
+ /* Accessed with atomic ops. */
+ int quiesce_counter;
+ int recursive_quiesce_counter;
+
+ unsigned int write_gen; /* Current data generation */
+
+ /* Protected by reqs_lock. */
+ CoMutex reqs_lock;
+ QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+ CoQueue flush_queue; /* Serializing flush queue */
+ bool active_flush_req; /* Flush request in flight? */
+
+ /* Only read/written by whoever has set active_flush_req to true. */
+ unsigned int flushed_gen; /* Flushed write generation */
+
+ /* BdrvChild links to this node may never be frozen */
+ bool never_freeze;
+
+ /* Lock for block-status cache RCU writers */
+ CoMutex bsc_modify_lock;
+ /* Always non-NULL, but must only be dereferenced under an RCU read guard */
+ BdrvBlockStatusCache *block_status_cache;
+};
+
+struct BlockBackendRootState {
+ int open_flags;
+ BlockdevDetectZeroesOptions detect_zeroes;
+};
+
+typedef enum BlockMirrorBackingMode {
+ /*
+ * Reuse the existing backing chain from the source for the target.
+ * - sync=full: Set backing BDS to NULL.
+ * - sync=top: Use source's backing BDS.
+ * - sync=none: Use source as the backing BDS.
+ */
+ MIRROR_SOURCE_BACKING_CHAIN,
+
+ /* Open the target's backing chain completely anew */
+ MIRROR_OPEN_BACKING_CHAIN,
+
+ /* Do not change the target's backing BDS after job completion */
+ MIRROR_LEAVE_BACKING_CHAIN,
+} BlockMirrorBackingMode;
+
+
+/*
+ * Essential block drivers which must always be statically linked into qemu, and
+ * which therefore can be accessed without using bdrv_find_format()
+ */
+extern BlockDriver bdrv_file;
+extern BlockDriver bdrv_raw;
+extern BlockDriver bdrv_qcow2;
+
+extern unsigned int bdrv_drain_all_count;
+extern QemuOptsList bdrv_create_opts_simple;
+
+/*
+ * Common functions that are neither I/O nor Global State.
+ *
+ * See include/block/block-commmon.h for more information about
+ * the Common API.
+ */
+
+static inline BlockDriverState *child_bs(BdrvChild *child)
+{
+ return child ? child->bs : NULL;
+}
+
+int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp);
+int get_tmp_filename(char *filename, int size);
+void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
+ QDict *options);
+
+
+int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ Error **errp);
+
+#ifdef _WIN32
+int is_windows_drive(const char *filename);
+#endif
+
+#endif /* BLOCK_INT_COMMON_H */
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
new file mode 100644
index 0000000000..0f21b0570b
--- /dev/null
+++ b/include/block/block_int-global-state.h
@@ -0,0 +1,329 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_GLOBAL_STATE_H
+#define BLOCK_INT_GLOBAL_STATE_H
+
+#include "block_int-common.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+/**
+ * stream_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Block device to operate on.
+ * @base: Block device that will become the new base, or %NULL to
+ * flatten the whole backing file chain onto @bs.
+ * @backing_file_str: The file name that will be written to @bs as the
+ * the new backing file if the job completes. Ignored if @base is %NULL.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the stream job inserts into the graph above
+ * @bs. NULL means that a node name should be autogenerated.
+ * @errp: Error object.
+ *
+ * Start a streaming operation on @bs. Clusters that are unallocated
+ * in @bs, but allocated in any image between @base and @bs (both
+ * exclusive) will be written to @bs. At the end of a successful
+ * streaming job, the backing file of @bs will be changed to
+ * @backing_file_str in the written image and to @base in the live
+ * BlockDriverState.
+ */
+void stream_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, const char *backing_file_str,
+ BlockDriverState *bottom,
+ int creation_flags, int64_t speed,
+ BlockdevOnError on_error,
+ const char *filter_node_name,
+ Error **errp);
+
+/**
+ * commit_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Active block device.
+ * @top: Top block device to be committed.
+ * @base: Block device that will be written into, and become the new top.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @backing_file_str: String to use as the backing file in @top's overlay
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @top. NULL means
+ * that a node name should be autogenerated.
+ * @errp: Error object.
+ *
+ */
+void commit_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, BlockDriverState *top,
+ int creation_flags, int64_t speed,
+ BlockdevOnError on_error, const char *backing_file_str,
+ const char *filter_node_name, Error **errp);
+/**
+ * commit_active_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Active block device to be committed.
+ * @base: Block device that will be written into, and become the new top.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @auto_complete: Auto complete the job.
+ * @errp: Error object.
+ *
+ */
+BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, int creation_flags,
+ int64_t speed, BlockdevOnError on_error,
+ const char *filter_node_name,
+ BlockCompletionFunc *cb, void *opaque,
+ bool auto_complete, Error **errp);
+/*
+ * mirror_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @replaces: Block graph node name to replace once the mirror is done. Can
+ * only be used when full mirroring is selected.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @granularity: The chosen granularity for the dirty bitmap.
+ * @buf_size: The amount of data that can be in flight at one time.
+ * @mode: Whether to collapse all images in the chain to the target.
+ * @backing_mode: How to establish the target's backing chain after completion.
+ * @zero_target: Whether the target should be explicitly zero-initialized
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @unmap: Whether to unmap target where source sectors only contain zeroes.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the mirror job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
+ * @copy_mode: When to trigger writes to the target.
+ * @errp: Error object.
+ *
+ * Start a mirroring operation on @bs. Clusters that are allocated
+ * in @bs will be written to @target until the job is cancelled or
+ * manually completed. At the end of a successful mirroring job,
+ * @bs will be switched to read from @target.
+ */
+void mirror_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, const char *replaces,
+ int creation_flags, int64_t speed,
+ uint32_t granularity, int64_t buf_size,
+ MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
+ bool zero_target,
+ BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ bool unmap, const char *filter_node_name,
+ MirrorCopyMode copy_mode, Error **errp);
+
+/*
+ * backup_job_create:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @sync_mode: What parts of the disk image should be copied to the destination.
+ * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental'
+ * @bitmap_mode: The bitmap synchronization policy to use.
+ * @perf: Performance options. All actual fields assumed to be present,
+ * all ".has_*" fields are ignored.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @txn: Transaction that this job is part of (may be NULL).
+ *
+ * Create a backup operation on @bs. Clusters in @bs are written to @target
+ * until the job is cancelled or manually completed.
+ */
+BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, int64_t speed,
+ MirrorSyncMode sync_mode,
+ BdrvDirtyBitmap *sync_bitmap,
+ BitmapSyncMode bitmap_mode,
+ bool compress,
+ const char *filter_node_name,
+ BackupPerf *perf,
+ BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ int creation_flags,
+ BlockCompletionFunc *cb, void *opaque,
+ JobTxn *txn, Error **errp);
+
+BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ uint64_t perm, uint64_t shared_perm,
+ void *opaque, Error **errp);
+void bdrv_root_unref_child(BdrvChild *child);
+
+void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
+ uint64_t *shared_perm);
+
+/**
+ * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use
+ * bdrv_child_refresh_perms() instead and make the parent's
+ * .bdrv_child_perm() implementation return the correct values.
+ */
+int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+ Error **errp);
+
+/**
+ * Calls bs->drv->bdrv_child_perm() and updates the child's permission
+ * masks with the result.
+ * Drivers should invoke this function whenever an event occurs that
+ * makes their .bdrv_child_perm() implementation return different
+ * values than before, but which will not result in the block layer
+ * automatically refreshing the permissions.
+ */
+int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp);
+
+bool bdrv_recurse_can_replace(BlockDriverState *bs,
+ BlockDriverState *to_replace);
+
+/*
+ * Default implementation for BlockDriver.bdrv_child_perm() that can
+ * be used by block filters and image formats, as long as they use the
+ * child_of_bds child class and set an appropriate BdrvChildRole.
+ */
+void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
+ BdrvChildRole role, BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared);
+
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
+bool blk_dev_has_removable_media(BlockBackend *blk);
+void blk_dev_eject_request(BlockBackend *blk, bool force);
+bool blk_dev_is_medium_locked(BlockBackend *blk);
+
+void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup);
+
+void bdrv_set_monitor_owned(BlockDriverState *bs);
+
+void blockdev_close_all_bdrv_states(void);
+
+BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp);
+
+/**
+ * Simple implementation of bdrv_co_create_opts for protocol drivers
+ * which only support creation via opening a file
+ * (usually existing raw storage device)
+ */
+int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
+ const char *filename,
+ QemuOpts *opts,
+ Error **errp);
+
+BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
+ const char *name,
+ BlockDriverState **pbs,
+ Error **errp);
+BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
+ BlockDirtyBitmapMergeSourceList *bms,
+ HBitmap **backup, Error **errp);
+BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
+ bool release,
+ BlockDriverState **bitmap_bs,
+ Error **errp);
+
+
+BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs);
+
+/**
+ * bdrv_add_aio_context_notifier:
+ *
+ * If a long-running job intends to be always run in the same AioContext as a
+ * certain BDS, it may use this function to be notified of changes regarding the
+ * association of the BDS to an AioContext.
+ *
+ * attached_aio_context() is called after the target BDS has been attached to a
+ * new AioContext; detach_aio_context() is called before the target BDS is being
+ * detached from its old AioContext.
+ */
+void bdrv_add_aio_context_notifier(BlockDriverState *bs,
+ void (*attached_aio_context)(AioContext *new_context, void *opaque),
+ void (*detach_aio_context)(void *opaque), void *opaque);
+
+/**
+ * bdrv_remove_aio_context_notifier:
+ *
+ * Unsubscribe of change notifications regarding the BDS's AioContext. The
+ * parameters given here have to be the same as those given to
+ * bdrv_add_aio_context_notifier().
+ */
+void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
+ void (*aio_context_attached)(AioContext *,
+ void *),
+ void (*aio_context_detached)(void *),
+ void *opaque);
+
+/**
+ * End all quiescent sections started by bdrv_drain_all_begin(). This is
+ * needed when deleting a BDS before bdrv_drain_all_end() is called.
+ *
+ * NOTE: this is an internal helper for bdrv_close() *only*. No one else
+ * should call it.
+ */
+void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
+
+/**
+ * Make sure that the function is running under both drain and BQL.
+ * The latter protects from concurrent writings
+ * from the GS API, while the former prevents concurrent reads
+ * from I/O.
+ */
+static inline void assert_bdrv_graph_writable(BlockDriverState *bs)
+{
+ /*
+ * TODO: this function is incomplete. Because the users of this
+ * assert lack the necessary drains, check only for BQL.
+ * Once the necessary drains are added,
+ * assert also for qatomic_read(&bs->quiesce_counter) > 0
+ */
+ assert(qemu_in_main_thread());
+}
+
+#endif /* BLOCK_INT_GLOBAL_STATE */
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
new file mode 100644
index 0000000000..bb454200e5
--- /dev/null
+++ b/include/block/block_int-io.h
@@ -0,0 +1,194 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_IO_H
+#define BLOCK_INT_IO_H
+
+#include "block_int-common.h"
+
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+int coroutine_fn bdrv_co_preadv_snapshot(BdrvChild *child,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
+int coroutine_fn bdrv_co_snapshot_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file);
+int coroutine_fn bdrv_co_pdiscard_snapshot(BlockDriverState *bs,
+ int64_t offset, int64_t bytes);
+
+
+int coroutine_fn bdrv_co_preadv(BdrvChild *child,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
+int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
+
+static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
+ int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_CODE();
+
+ return bdrv_co_preadv(child, offset, bytes, &qiov, flags);
+}
+
+static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
+ int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_CODE();
+
+ return bdrv_co_pwritev(child, offset, bytes, &qiov, flags);
+}
+
+bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
+ uint64_t align);
+BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs);
+
+BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
+ const char *filename);
+
+/**
+ * bdrv_wakeup:
+ * @bs: The BlockDriverState for which an I/O operation has been completed.
+ *
+ * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During
+ * synchronous I/O on a BlockDriverState that is attached to another
+ * I/O thread, the main thread lets the I/O thread's event loop run,
+ * waiting for the I/O operation to complete. A bdrv_wakeup will wake
+ * up the main thread if necessary.
+ *
+ * Manual calls to bdrv_wakeup are rarely necessary, because
+ * bdrv_dec_in_flight already calls it.
+ */
+void bdrv_wakeup(BlockDriverState *bs);
+
+const char *bdrv_get_parent_name(const BlockDriverState *bs);
+bool blk_dev_has_tray(BlockBackend *blk);
+bool blk_dev_is_tray_open(BlockBackend *blk);
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
+
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
+bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
+ const BdrvDirtyBitmap *src,
+ HBitmap **backup, bool lock);
+
+void bdrv_inc_in_flight(BlockDriverState *bs);
+void bdrv_dec_in_flight(BlockDriverState *bs);
+
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
+ BdrvChild *dst, int64_t dst_offset,
+ int64_t bytes,
+ BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
+ BdrvChild *dst, int64_t dst_offset,
+ int64_t bytes,
+ BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+int refresh_total_sectors(BlockDriverState *bs, int64_t hint);
+
+BdrvChild *bdrv_cow_child(BlockDriverState *bs);
+BdrvChild *bdrv_filter_child(BlockDriverState *bs);
+BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs);
+BdrvChild *bdrv_primary_child(BlockDriverState *bs);
+BlockDriverState *bdrv_skip_filters(BlockDriverState *bs);
+BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs);
+
+static inline BlockDriverState *bdrv_cow_bs(BlockDriverState *bs)
+{
+ IO_CODE();
+ return child_bs(bdrv_cow_child(bs));
+}
+
+static inline BlockDriverState *bdrv_filter_bs(BlockDriverState *bs)
+{
+ IO_CODE();
+ return child_bs(bdrv_filter_child(bs));
+}
+
+static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs)
+{
+ IO_CODE();
+ return child_bs(bdrv_filter_or_cow_child(bs));
+}
+
+static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs)
+{
+ IO_CODE();
+ return child_bs(bdrv_primary_child(bs));
+}
+
+/**
+ * Check whether the given offset is in the cached block-status data
+ * region.
+ *
+ * If it is, and @pnum is not NULL, *pnum is set to
+ * `bsc.data_end - offset`, i.e. how many bytes, starting from
+ * @offset, are data (according to the cache).
+ * Otherwise, *pnum is not touched.
+ */
+bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum);
+
+/**
+ * If [offset, offset + bytes) overlaps with the currently cached
+ * block-status region, invalidate the cache.
+ *
+ * (To be used by I/O paths that cause data regions to be zero or
+ * holes.)
+ */
+void bdrv_bsc_invalidate_range(BlockDriverState *bs,
+ int64_t offset, int64_t bytes);
+
+/**
+ * Mark the range [offset, offset + bytes) as a data region.
+ */
+void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
+
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * See include/block/block-io.h for more information about
+ * the "I/O or GS" API.
+ */
+
+void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
+void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
+
+#endif /* BLOCK_INT_IO_H */
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 27008cfb22..7d50b6bbd1 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -24,1478 +24,9 @@
#ifndef BLOCK_INT_H
#define BLOCK_INT_H
-#include "block/accounting.h"
-#include "block/block.h"
-#include "block/aio-wait.h"
-#include "qemu/queue.h"
-#include "qemu/coroutine.h"
-#include "qemu/stats64.h"
-#include "qemu/timer.h"
-#include "qemu/hbitmap.h"
-#include "block/snapshot.h"
-#include "qemu/throttle.h"
-#include "qemu/rcu.h"
+#include "block_int-global-state.h"
+#include "block_int-io.h"
-#define BLOCK_FLAG_LAZY_REFCOUNTS 8
-
-#define BLOCK_OPT_SIZE "size"
-#define BLOCK_OPT_ENCRYPT "encryption"
-#define BLOCK_OPT_ENCRYPT_FORMAT "encrypt.format"
-#define BLOCK_OPT_COMPAT6 "compat6"
-#define BLOCK_OPT_HWVERSION "hwversion"
-#define BLOCK_OPT_BACKING_FILE "backing_file"
-#define BLOCK_OPT_BACKING_FMT "backing_fmt"
-#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
-#define BLOCK_OPT_TABLE_SIZE "table_size"
-#define BLOCK_OPT_PREALLOC "preallocation"
-#define BLOCK_OPT_SUBFMT "subformat"
-#define BLOCK_OPT_COMPAT_LEVEL "compat"
-#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
-#define BLOCK_OPT_ADAPTER_TYPE "adapter_type"
-#define BLOCK_OPT_REDUNDANCY "redundancy"
-#define BLOCK_OPT_NOCOW "nocow"
-#define BLOCK_OPT_EXTENT_SIZE_HINT "extent_size_hint"
-#define BLOCK_OPT_OBJECT_SIZE "object_size"
-#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits"
-#define BLOCK_OPT_DATA_FILE "data_file"
-#define BLOCK_OPT_DATA_FILE_RAW "data_file_raw"
-#define BLOCK_OPT_COMPRESSION_TYPE "compression_type"
-#define BLOCK_OPT_EXTL2 "extended_l2"
-
-#define BLOCK_PROBE_BUF_SIZE 512
-
-enum BdrvTrackedRequestType {
- BDRV_TRACKED_READ,
- BDRV_TRACKED_WRITE,
- BDRV_TRACKED_DISCARD,
- BDRV_TRACKED_TRUNCATE,
-};
-
-/*
- * That is not quite good that BdrvTrackedRequest structure is public,
- * as block/io.c is very careful about incoming offset/bytes being
- * correct. Be sure to assert bdrv_check_request() succeeded after any
- * modification of BdrvTrackedRequest object out of block/io.c
- */
-typedef struct BdrvTrackedRequest {
- BlockDriverState *bs;
- int64_t offset;
- int64_t bytes;
- enum BdrvTrackedRequestType type;
-
- bool serialising;
- int64_t overlap_offset;
- int64_t overlap_bytes;
-
- QLIST_ENTRY(BdrvTrackedRequest) list;
- Coroutine *co; /* owner, used for deadlock detection */
- CoQueue wait_queue; /* coroutines blocked on this request */
-
- struct BdrvTrackedRequest *waiting_for;
-} BdrvTrackedRequest;
-
-int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset,
- Error **errp);
-int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp);
-
-struct BlockDriver {
- const char *format_name;
- int instance_size;
-
- /* set to true if the BlockDriver is a block filter. Block filters pass
- * certain callbacks that refer to data (see block.c) to their bs->file
- * or bs->backing (whichever one exists) if the driver doesn't implement
- * them. Drivers that do not wish to forward must implement them and return
- * -ENOTSUP.
- * Note that filters are not allowed to modify data.
- *
- * Filters generally cannot have more than a single filtered child,
- * because the data they present must at all times be the same as
- * that on their filtered child. That would be impossible to
- * achieve for multiple filtered children.
- * (And this filtered child must then be bs->file or bs->backing.)
- */
- bool is_filter;
- /*
- * Set to true if the BlockDriver is a format driver. Format nodes
- * generally do not expect their children to be other format nodes
- * (except for backing files), and so format probing is disabled
- * on those children.
- */
- bool is_format;
- /*
- * Return true if @to_replace can be replaced by a BDS with the
- * same data as @bs without it affecting @bs's behavior (that is,
- * without it being visible to @bs's parents).
- */
- bool (*bdrv_recurse_can_replace)(BlockDriverState *bs,
- BlockDriverState *to_replace);
-
- int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
- int (*bdrv_probe_device)(const char *filename);
-
- /* Any driver implementing this callback is expected to be able to handle
- * NULL file names in its .bdrv_open() implementation */
- void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
- /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have
- * this field set to true, except ones that are defined only by their
- * child's bs.
- * An example of the last type will be the quorum block driver.
- */
- bool bdrv_needs_filename;
-
- /*
- * Set if a driver can support backing files. This also implies the
- * following semantics:
- *
- * - Return status 0 of .bdrv_co_block_status means that corresponding
- * blocks are not allocated in this layer of backing-chain
- * - For such (unallocated) blocks, read will:
- * - fill buffer with zeros if there is no backing file
- * - read from the backing file otherwise, where the block layer
- * takes care of reading zeros beyond EOF if backing file is short
- */
- bool supports_backing;
-
- /* For handling image reopen for split or non-split files */
- int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
- BlockReopenQueue *queue, Error **errp);
- void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
- void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state);
- void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
- void (*bdrv_join_options)(QDict *options, QDict *old_options);
-
- int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
- Error **errp);
-
- /* Protocol drivers should implement this instead of bdrv_open */
- int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
- Error **errp);
- void (*bdrv_close)(BlockDriverState *bs);
-
-
- int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts,
- Error **errp);
- int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv,
- const char *filename,
- QemuOpts *opts,
- Error **errp);
-
- int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs,
- BlockdevAmendOptions *opts,
- bool force,
- Error **errp);
-
- int (*bdrv_amend_options)(BlockDriverState *bs,
- QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb,
- void *cb_opaque,
- bool force,
- Error **errp);
-
- int (*bdrv_make_empty)(BlockDriverState *bs);
-
- /*
- * Refreshes the bs->exact_filename field. If that is impossible,
- * bs->exact_filename has to be left empty.
- */
- void (*bdrv_refresh_filename)(BlockDriverState *bs);
-
- /*
- * Gathers the open options for all children into @target.
- * A simple format driver (without backing file support) might
- * implement this function like this:
- *
- * QINCREF(bs->file->bs->full_open_options);
- * qdict_put(target, "file", bs->file->bs->full_open_options);
- *
- * If not specified, the generic implementation will simply put
- * all children's options under their respective name.
- *
- * @backing_overridden is true when bs->backing seems not to be
- * the child that would result from opening bs->backing_file.
- * Therefore, if it is true, the backing child's options should be
- * gathered; otherwise, there is no need since the backing child
- * is the one implied by the image header.
- *
- * Note that ideally this function would not be needed. Every
- * block driver which implements it is probably doing something
- * shady regarding its runtime option structure.
- */
- void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target,
- bool backing_overridden);
-
- /*
- * Returns an allocated string which is the directory name of this BDS: It
- * will be used to make relative filenames absolute by prepending this
- * function's return value to them.
- */
- char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp);
-
- /* aio */
- BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
- BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
- BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
- BlockCompletionFunc *cb, void *opaque);
- BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
- int64_t offset, int bytes,
- BlockCompletionFunc *cb, void *opaque);
-
- int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
-
- /**
- * @offset: position in bytes to read at
- * @bytes: number of bytes to read
- * @qiov: the buffers to fill with read data
- * @flags: currently unused, always 0
- *
- * @offset and @bytes will be a multiple of 'request_alignment',
- * but the length of individual @qiov elements does not have to
- * be a multiple.
- *
- * @bytes will always equal the total size of @qiov, and will be
- * no larger than 'max_transfer'.
- *
- * The buffer in @qiov may point directly to guest memory.
- */
- int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
- int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs,
- int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
- int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
- /**
- * @offset: position in bytes to write at
- * @bytes: number of bytes to write
- * @qiov: the buffers containing data to write
- * @flags: zero or more bits allowed by 'supported_write_flags'
- *
- * @offset and @bytes will be a multiple of 'request_alignment',
- * but the length of individual @qiov elements does not have to
- * be a multiple.
- *
- * @bytes will always equal the total size of @qiov, and will be
- * no larger than 'max_transfer'.
- *
- * The buffer in @qiov may point directly to guest memory.
- */
- int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
- int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
- BdrvRequestFlags flags);
-
- /*
- * Efficiently zero a region of the disk image. Typically an image format
- * would use a compact metadata representation to implement this. This
- * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
- * will be called instead.
- */
- int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, BdrvRequestFlags flags);
- int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
- int64_t offset, int64_t bytes);
-
- /* Map [offset, offset + nbytes) range onto a child of @bs to copy from,
- * and invoke bdrv_co_copy_range_from(child, ...), or invoke
- * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
- *
- * See the comment of bdrv_co_copy_range for the parameter and return value
- * semantics.
- */
- int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
- BdrvChild *src,
- int64_t offset,
- BdrvChild *dst,
- int64_t dst_offset,
- int64_t bytes,
- BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-
- /* Map [offset, offset + nbytes) range onto a child of bs to copy data to,
- * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
- * operation if @bs is the leaf and @src has the same BlockDriver. Return
- * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
- *
- * See the comment of bdrv_co_copy_range for the parameter and return value
- * semantics.
- */
- int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
- BdrvChild *src,
- int64_t src_offset,
- BdrvChild *dst,
- int64_t dst_offset,
- int64_t bytes,
- BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-
- /*
- * Building block for bdrv_block_status[_above] and
- * bdrv_is_allocated[_above]. The driver should answer only
- * according to the current layer, and should only need to set
- * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
- * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
- * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See
- * block.h for the overall meaning of the bits. As a hint, the
- * flag want_zero is true if the caller cares more about precise
- * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
- * overall allocation (favor larger *pnum, perhaps by reporting
- * _DATA instead of _ZERO). The block layer guarantees input
- * clamped to bdrv_getlength() and aligned to request_alignment,
- * as well as non-NULL pnum, map, and file; in turn, the driver
- * must return an error or set pnum to an aligned non-zero value.
- *
- * Note that @bytes is just a hint on how big of a region the
- * caller wants to inspect. It is not a limit on *pnum.
- * Implementations are free to return larger values of *pnum if
- * doing so does not incur a performance penalty.
- *
- * block/io.c's bdrv_co_block_status() will utilize an unclamped
- * *pnum value for the block-status cache on protocol nodes, prior
- * to clamping *pnum for return to its caller.
- */
- int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
- bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
- int64_t *map, BlockDriverState **file);
-
- /*
- * This informs the driver that we are no longer interested in the result
- * of in-flight requests, so don't waste the time if possible.
- *
- * One example usage is to avoid waiting for an nbd target node reconnect
- * timeout during job-cancel with force=true.
- */
- void (*bdrv_cancel_in_flight)(BlockDriverState *bs);
-
- /*
- * Invalidate any cached meta-data.
- */
- void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs,
- Error **errp);
- int (*bdrv_inactivate)(BlockDriverState *bs);
-
- /*
- * Flushes all data for all layers by calling bdrv_co_flush for underlying
- * layers, if needed. This function is needed for deterministic
- * synchronization of the flush finishing callback.
- */
- int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
-
- /* Delete a created file. */
- int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs,
- Error **errp);
-
- /*
- * Flushes all data that was already written to the OS all the way down to
- * the disk (for example file-posix.c calls fsync()).
- */
- int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
-
- /*
- * Flushes all internal caches to the OS. The data may still sit in a
- * writeback cache of the host OS, but it will survive a crash of the qemu
- * process.
- */
- int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
-
- /*
- * Drivers setting this field must be able to work with just a plain
- * filename with '<protocol_name>:' as a prefix, and no other options.
- * Options may be extracted from the filename by implementing
- * bdrv_parse_filename.
- */
- const char *protocol_name;
-
- /*
- * Truncate @bs to @offset bytes using the given @prealloc mode
- * when growing. Modes other than PREALLOC_MODE_OFF should be
- * rejected when shrinking @bs.
- *
- * If @exact is true, @bs must be resized to exactly @offset.
- * Otherwise, it is sufficient for @bs (if it is a host block
- * device and thus there is no way to resize it) to be at least
- * @offset bytes in length.
- *
- * If @exact is true and this function fails but would succeed
- * with @exact = false, it should return -ENOTSUP.
- */
- int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
- bool exact, PreallocMode prealloc,
- BdrvRequestFlags flags, Error **errp);
-
- int64_t (*bdrv_getlength)(BlockDriverState *bs);
- bool has_variable_length;
- int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
- BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs,
- Error **errp);
-
- int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov);
- int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
-
- int (*bdrv_snapshot_create)(BlockDriverState *bs,
- QEMUSnapshotInfo *sn_info);
- int (*bdrv_snapshot_goto)(BlockDriverState *bs,
- const char *snapshot_id);
- int (*bdrv_snapshot_delete)(BlockDriverState *bs,
- const char *snapshot_id,
- const char *name,
- Error **errp);
- int (*bdrv_snapshot_list)(BlockDriverState *bs,
- QEMUSnapshotInfo **psn_info);
- int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
- const char *snapshot_id,
- const char *name,
- Error **errp);
- int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
- ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs,
- Error **errp);
- BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs);
-
- int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
- QEMUIOVector *qiov,
- int64_t pos);
- int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
- QEMUIOVector *qiov,
- int64_t pos);
-
- int (*bdrv_change_backing_file)(BlockDriverState *bs,
- const char *backing_file, const char *backing_fmt);
-
- /* removable device specific */
- bool (*bdrv_is_inserted)(BlockDriverState *bs);
- void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
- void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
-
- /* to control generic scsi devices */
- BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
- unsigned long int req, void *buf,
- BlockCompletionFunc *cb, void *opaque);
- int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs,
- unsigned long int req, void *buf);
-
- /* List of options for creating images, terminated by name == NULL */
- QemuOptsList *create_opts;
-
- /* List of options for image amend */
- QemuOptsList *amend_opts;
-
- /*
- * If this driver supports reopening images this contains a
- * NULL-terminated list of the runtime options that can be
- * modified. If an option in this list is unspecified during
- * reopen then it _must_ be reset to its default value or return
- * an error.
- */
- const char *const *mutable_opts;
-
- /*
- * Returns 0 for completed check, -errno for internal errors.
- * The check results are stored in result.
- */
- int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs,
- BdrvCheckResult *result,
- BdrvCheckMode fix);
-
- void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
-
- /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
- int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
- const char *tag);
- int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
- const char *tag);
- int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
- bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
-
- void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp);
-
- /*
- * Returns 1 if newly created images are guaranteed to contain only
- * zeros, 0 otherwise.
- */
- int (*bdrv_has_zero_init)(BlockDriverState *bs);
-
- /* Remove fd handlers, timers, and other event loop callbacks so the event
- * loop is no longer in use. Called with no in-flight requests and in
- * depth-first traversal order with parents before child nodes.
- */
- void (*bdrv_detach_aio_context)(BlockDriverState *bs);
-
- /* Add fd handlers, timers, and other event loop callbacks so I/O requests
- * can be processed again. Called with no in-flight requests and in
- * depth-first traversal order with child nodes before parent nodes.
- */
- void (*bdrv_attach_aio_context)(BlockDriverState *bs,
- AioContext *new_context);
-
- /* io queue for linux-aio */
- void (*bdrv_io_plug)(BlockDriverState *bs);
- void (*bdrv_io_unplug)(BlockDriverState *bs);
-
- /**
- * Try to get @bs's logical and physical block size.
- * On success, store them in @bsz and return zero.
- * On failure, return negative errno.
- */
- int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz);
- /**
- * Try to get @bs's geometry (cyls, heads, sectors)
- * On success, store them in @geo and return 0.
- * On failure return -errno.
- * Only drivers that want to override guest geometry implement this
- * callback; see hd_geometry_guess().
- */
- int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
-
- /**
- * bdrv_co_drain_begin is called if implemented in the beginning of a
- * drain operation to drain and stop any internal sources of requests in
- * the driver.
- * bdrv_co_drain_end is called if implemented at the end of the drain.
- *
- * They should be used by the driver to e.g. manage scheduled I/O
- * requests, or toggle an internal state. After the end of the drain new
- * requests will continue normally.
- */
- void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
- void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
-
- void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
- Error **errp);
- void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
- Error **errp);
-
- /**
- * Informs the block driver that a permission change is intended. The
- * driver checks whether the change is permissible and may take other
- * preparations for the change (e.g. get file system locks). This operation
- * is always followed either by a call to either .bdrv_set_perm or
- * .bdrv_abort_perm_update.
- *
- * Checks whether the requested set of cumulative permissions in @perm
- * can be granted for accessing @bs and whether no other users are using
- * permissions other than those given in @shared (both arguments take
- * BLK_PERM_* bitmasks).
- *
- * If both conditions are met, 0 is returned. Otherwise, -errno is returned
- * and errp is set to an error describing the conflict.
- */
- int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
- uint64_t shared, Error **errp);
-
- /**
- * Called to inform the driver that the set of cumulative set of used
- * permissions for @bs has changed to @perm, and the set of sharable
- * permission to @shared. The driver can use this to propagate changes to
- * its children (i.e. request permissions only if a parent actually needs
- * them).
- *
- * This function is only invoked after bdrv_check_perm(), so block drivers
- * may rely on preparations made in their .bdrv_check_perm implementation.
- */
- void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared);
-
- /*
- * Called to inform the driver that after a previous bdrv_check_perm()
- * call, the permission update is not performed and any preparations made
- * for it (e.g. taken file locks) need to be undone.
- *
- * This function can be called even for nodes that never saw a
- * bdrv_check_perm() call. It is a no-op then.
- */
- void (*bdrv_abort_perm_update)(BlockDriverState *bs);
-
- /**
- * Returns in @nperm and @nshared the permissions that the driver for @bs
- * needs on its child @c, based on the cumulative permissions requested by
- * the parents in @parent_perm and @parent_shared.
- *
- * If @c is NULL, return the permissions for attaching a new child for the
- * given @child_class and @role.
- *
- * If @reopen_queue is non-NULL, don't return the currently needed
- * permissions, but those that will be needed after applying the
- * @reopen_queue.
- */
- void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c,
- BdrvChildRole role,
- BlockReopenQueue *reopen_queue,
- uint64_t parent_perm, uint64_t parent_shared,
- uint64_t *nperm, uint64_t *nshared);
-
- bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
- bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs,
- const char *name,
- uint32_t granularity,
- Error **errp);
- int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs,
- const char *name,
- Error **errp);
-
- /**
- * Register/unregister a buffer for I/O. For example, when the driver is
- * interested to know the memory areas that will later be used in iovs, so
- * that it can do IOMMU mapping with VFIO etc., in order to get better
- * performance. In the case of VFIO drivers, this callback is used to do
- * DMA mapping for hot buffers.
- */
- void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
- void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
- QLIST_ENTRY(BlockDriver) list;
-
- /* Pointer to a NULL-terminated array of names of strong options
- * that can be specified for bdrv_open(). A strong option is one
- * that changes the data of a BDS.
- * If this pointer is NULL, the array is considered empty.
- * "filename" and "driver" are always considered strong. */
- const char *const *strong_runtime_opts;
-};
-
-static inline bool block_driver_can_compress(BlockDriver *drv)
-{
- return drv->bdrv_co_pwritev_compressed ||
- drv->bdrv_co_pwritev_compressed_part;
-}
-
-typedef struct BlockLimits {
- /* Alignment requirement, in bytes, for offset/length of I/O
- * requests. Must be a power of 2 less than INT_MAX; defaults to
- * 1 for drivers with modern byte interfaces, and to 512
- * otherwise. */
- uint32_t request_alignment;
-
- /*
- * Maximum number of bytes that can be discarded at once. Must be multiple
- * of pdiscard_alignment, but need not be power of 2. May be 0 if no
- * inherent 64-bit limit.
- */
- int64_t max_pdiscard;
-
- /* Optimal alignment for discard requests in bytes. A power of 2
- * is best but not mandatory. Must be a multiple of
- * bl.request_alignment, and must be less than max_pdiscard if
- * that is set. May be 0 if bl.request_alignment is good enough */
- uint32_t pdiscard_alignment;
-
- /*
- * Maximum number of bytes that can zeroized at once. Must be multiple of
- * pwrite_zeroes_alignment. 0 means no limit.
- */
- int64_t max_pwrite_zeroes;
-
- /* Optimal alignment for write zeroes requests in bytes. A power
- * of 2 is best but not mandatory. Must be a multiple of
- * bl.request_alignment, and must be less than max_pwrite_zeroes
- * if that is set. May be 0 if bl.request_alignment is good
- * enough */
- uint32_t pwrite_zeroes_alignment;
-
- /* Optimal transfer length in bytes. A power of 2 is best but not
- * mandatory. Must be a multiple of bl.request_alignment, or 0 if
- * no preferred size */
- uint32_t opt_transfer;
-
- /* Maximal transfer length in bytes. Need not be power of 2, but
- * must be multiple of opt_transfer and bl.request_alignment, or 0
- * for no 32-bit limit. For now, anything larger than INT_MAX is
- * clamped down. */
- uint32_t max_transfer;
-
- /* Maximal hardware transfer length in bytes. Applies whenever
- * transfers to the device bypass the kernel I/O scheduler, for
- * example with SG_IO. If larger than max_transfer or if zero,
- * blk_get_max_hw_transfer will fall back to max_transfer.
- */
- uint64_t max_hw_transfer;
-
- /* Maximal number of scatter/gather elements allowed by the hardware.
- * Applies whenever transfers to the device bypass the kernel I/O
- * scheduler, for example with SG_IO. If larger than max_iov
- * or if zero, blk_get_max_hw_iov will fall back to max_iov.
- */
- int max_hw_iov;
-
- /* memory alignment, in bytes so that no bounce buffer is needed */
- size_t min_mem_alignment;
-
- /* memory alignment, in bytes, for bounce buffer */
- size_t opt_mem_alignment;
-
- /* maximum number of iovec elements */
- int max_iov;
-} BlockLimits;
-
-typedef struct BdrvOpBlocker BdrvOpBlocker;
-
-typedef struct BdrvAioNotifier {
- void (*attached_aio_context)(AioContext *new_context, void *opaque);
- void (*detach_aio_context)(void *opaque);
-
- void *opaque;
- bool deleted;
-
- QLIST_ENTRY(BdrvAioNotifier) list;
-} BdrvAioNotifier;
-
-struct BdrvChildClass {
- /* If true, bdrv_replace_node() doesn't change the node this BdrvChild
- * points to. */
- bool stay_at_node;
-
- /* If true, the parent is a BlockDriverState and bdrv_next_all_states()
- * will return it. This information is used for drain_all, where every node
- * will be drained separately, so the drain only needs to be propagated to
- * non-BDS parents. */
- bool parent_is_bds;
-
- void (*inherit_options)(BdrvChildRole role, bool parent_is_format,
- int *child_flags, QDict *child_options,
- int parent_flags, QDict *parent_options);
-
- void (*change_media)(BdrvChild *child, bool load);
- void (*resize)(BdrvChild *child);
-
- /* Returns a name that is supposedly more useful for human users than the
- * node name for identifying the node in question (in particular, a BB
- * name), or NULL if the parent can't provide a better name. */
- const char *(*get_name)(BdrvChild *child);
-
- /* Returns a malloced string that describes the parent of the child for a
- * human reader. This could be a node-name, BlockBackend name, qdev ID or
- * QOM path of the device owning the BlockBackend, job type and ID etc. The
- * caller is responsible for freeing the memory. */
- char *(*get_parent_desc)(BdrvChild *child);
-
- /*
- * If this pair of functions is implemented, the parent doesn't issue new
- * requests after returning from .drained_begin() until .drained_end() is
- * called.
- *
- * These functions must not change the graph (and therefore also must not
- * call aio_poll(), which could change the graph indirectly).
- *
- * If drained_end() schedules background operations, it must atomically
- * increment *drained_end_counter for each such operation and atomically
- * decrement it once the operation has settled.
- *
- * Note that this can be nested. If drained_begin() was called twice, new
- * I/O is allowed only after drained_end() was called twice, too.
- */
- void (*drained_begin)(BdrvChild *child);
- void (*drained_end)(BdrvChild *child, int *drained_end_counter);
-
- /*
- * Returns whether the parent has pending requests for the child. This
- * callback is polled after .drained_begin() has been called until all
- * activity on the child has stopped.
- */
- bool (*drained_poll)(BdrvChild *child);
-
- /* Notifies the parent that the child has been activated/inactivated (e.g.
- * when migration is completing) and it can start/stop requesting
- * permissions and doing I/O on it. */
- void (*activate)(BdrvChild *child, Error **errp);
- int (*inactivate)(BdrvChild *child);
-
- void (*attach)(BdrvChild *child);
- void (*detach)(BdrvChild *child);
-
- /* Notifies the parent that the filename of its child has changed (e.g.
- * because the direct child was removed from the backing chain), so that it
- * can update its reference. */
- int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
- const char *filename, Error **errp);
-
- bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx,
- GSList **ignore, Error **errp);
- void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore);
-
- AioContext *(*get_parent_aio_context)(BdrvChild *child);
-};
-
-extern const BdrvChildClass child_of_bds;
-
-struct BdrvChild {
- BlockDriverState *bs;
- char *name;
- const BdrvChildClass *klass;
- BdrvChildRole role;
- void *opaque;
-
- /**
- * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask)
- */
- uint64_t perm;
-
- /**
- * Permissions that can still be granted to other users of @bs while this
- * BdrvChild is still attached to it. (BLK_PERM_* bitmask)
- */
- uint64_t shared_perm;
-
- /*
- * This link is frozen: the child can neither be replaced nor
- * detached from the parent.
- */
- bool frozen;
-
- /*
- * How many times the parent of this child has been drained
- * (through klass->drained_*).
- * Usually, this is equal to bs->quiesce_counter (potentially
- * reduced by bdrv_drain_all_count). It may differ while the
- * child is entering or leaving a drained section.
- */
- int parent_quiesce_counter;
-
- QLIST_ENTRY(BdrvChild) next;
- QLIST_ENTRY(BdrvChild) next_parent;
-};
-
-/*
- * Allows bdrv_co_block_status() to cache one data region for a
- * protocol node.
- *
- * @valid: Whether the cache is valid (should be accessed with atomic
- * functions so this can be reset by RCU readers)
- * @data_start: Offset where we know (or strongly assume) is data
- * @data_end: Offset where the data region ends (which is not necessarily
- * the start of a zeroed region)
- */
-typedef struct BdrvBlockStatusCache {
- struct rcu_head rcu;
-
- bool valid;
- int64_t data_start;
- int64_t data_end;
-} BdrvBlockStatusCache;
-
-struct BlockDriverState {
- /* Protected by big QEMU lock or read-only after opening. No special
- * locking needed during I/O...
- */
- int open_flags; /* flags used to open the file, re-used for re-open */
- bool encrypted; /* if true, the media is encrypted */
- bool sg; /* if true, the device is a /dev/sg* */
- bool probed; /* if true, format was probed rather than specified */
- bool force_share; /* if true, always allow all shared permissions */
- bool implicit; /* if true, this filter node was automatically inserted */
-
- BlockDriver *drv; /* NULL means no media */
- void *opaque;
-
- AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
- /* long-running tasks intended to always use the same AioContext as this
- * BDS may register themselves in this list to be notified of changes
- * regarding this BDS's context */
- QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
- bool walking_aio_notifiers; /* to make removal during iteration safe */
-
- char filename[PATH_MAX];
- /*
- * If not empty, this image is a diff in relation to backing_file.
- * Note that this is the name given in the image header and
- * therefore may or may not be equal to .backing->bs->filename.
- * If this field contains a relative path, it is to be resolved
- * relatively to the overlay's location.
- */
- char backing_file[PATH_MAX];
- /*
- * The backing filename indicated by the image header. Contrary
- * to backing_file, if we ever open this file, auto_backing_file
- * is replaced by the resulting BDS's filename (i.e. after a
- * bdrv_refresh_filename() run).
- */
- char auto_backing_file[PATH_MAX];
- char backing_format[16]; /* if non-zero and backing_file exists */
-
- QDict *full_open_options;
- char exact_filename[PATH_MAX];
-
- BdrvChild *backing;
- BdrvChild *file;
-
- /* I/O Limits */
- BlockLimits bl;
-
- /*
- * Flags honored during pread
- */
- unsigned int supported_read_flags;
- /* Flags honored during pwrite (so far: BDRV_REQ_FUA,
- * BDRV_REQ_WRITE_UNCHANGED).
- * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
- * writes will be issued as normal writes without the flag set.
- * This is important to note for drivers that do not explicitly
- * request a WRITE permission for their children and instead take
- * the same permissions as their parent did (this is commonly what
- * block filters do). Such drivers have to be aware that the
- * parent may have taken a WRITE_UNCHANGED permission only and is
- * issuing such requests. Drivers either must make sure that
- * these requests do not result in plain WRITE accesses (usually
- * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding
- * every incoming write request as-is, including potentially that
- * flag), or they have to explicitly take the WRITE permission for
- * their children. */
- unsigned int supported_write_flags;
- /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
- * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */
- unsigned int supported_zero_flags;
- /*
- * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
- *
- * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure
- * that any added space reads as all zeros. If this can't be guaranteed,
- * the operation must fail.
- */
- unsigned int supported_truncate_flags;
-
- /* the following member gives a name to every node on the bs graph. */
- char node_name[32];
- /* element of the list of named nodes building the graph */
- QTAILQ_ENTRY(BlockDriverState) node_list;
- /* element of the list of all BlockDriverStates (all_bdrv_states) */
- QTAILQ_ENTRY(BlockDriverState) bs_list;
- /* element of the list of monitor-owned BDS */
- QTAILQ_ENTRY(BlockDriverState) monitor_list;
- int refcnt;
-
- /* operation blockers */
- QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];
-
- /* The node that this node inherited default options from (and a reopen on
- * which can affect this node by changing these defaults). This is always a
- * parent node of this node. */
- BlockDriverState *inherits_from;
- QLIST_HEAD(, BdrvChild) children;
- QLIST_HEAD(, BdrvChild) parents;
-
- QDict *options;
- QDict *explicit_options;
- BlockdevDetectZeroesOptions detect_zeroes;
-
- /* The error object in use for blocking operations on backing_hd */
- Error *backing_blocker;
-
- /* Protected by AioContext lock */
-
- /* If we are reading a disk image, give its size in sectors.
- * Generally read-only; it is written to by load_snapshot and
- * save_snaphost, but the block layer is quiescent during those.
- */
- int64_t total_sectors;
-
- /* threshold limit for writes, in bytes. "High water mark". */
- uint64_t write_threshold_offset;
-
- /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
- * Reading from the list can be done with either the BQL or the
- * dirty_bitmap_mutex. Modifying a bitmap only requires
- * dirty_bitmap_mutex. */
- QemuMutex dirty_bitmap_mutex;
- QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
-
- /* Offset after the highest byte written to */
- Stat64 wr_highest_offset;
-
- /* If true, copy read backing sectors into image. Can be >1 if more
- * than one client has requested copy-on-read. Accessed with atomic
- * ops.
- */
- int copy_on_read;
-
- /* number of in-flight requests; overall and serialising.
- * Accessed with atomic ops.
- */
- unsigned int in_flight;
- unsigned int serialising_in_flight;
-
- /* counter for nested bdrv_io_plug.
- * Accessed with atomic ops.
- */
- unsigned io_plugged;
-
- /* do we need to tell the quest if we have a volatile write cache? */
- int enable_write_cache;
-
- /* Accessed with atomic ops. */
- int quiesce_counter;
- int recursive_quiesce_counter;
-
- unsigned int write_gen; /* Current data generation */
-
- /* Protected by reqs_lock. */
- CoMutex reqs_lock;
- QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
- CoQueue flush_queue; /* Serializing flush queue */
- bool active_flush_req; /* Flush request in flight? */
-
- /* Only read/written by whoever has set active_flush_req to true. */
- unsigned int flushed_gen; /* Flushed write generation */
-
- /* BdrvChild links to this node may never be frozen */
- bool never_freeze;
-
- /* Lock for block-status cache RCU writers */
- CoMutex bsc_modify_lock;
- /* Always non-NULL, but must only be dereferenced under an RCU read guard */
- BdrvBlockStatusCache *block_status_cache;
-};
-
-struct BlockBackendRootState {
- int open_flags;
- BlockdevDetectZeroesOptions detect_zeroes;
-};
-
-typedef enum BlockMirrorBackingMode {
- /* Reuse the existing backing chain from the source for the target.
- * - sync=full: Set backing BDS to NULL.
- * - sync=top: Use source's backing BDS.
- * - sync=none: Use source as the backing BDS. */
- MIRROR_SOURCE_BACKING_CHAIN,
-
- /* Open the target's backing chain completely anew */
- MIRROR_OPEN_BACKING_CHAIN,
-
- /* Do not change the target's backing BDS after job completion */
- MIRROR_LEAVE_BACKING_CHAIN,
-} BlockMirrorBackingMode;
-
-
-/* Essential block drivers which must always be statically linked into qemu, and
- * which therefore can be accessed without using bdrv_find_format() */
-extern BlockDriver bdrv_file;
-extern BlockDriver bdrv_raw;
-extern BlockDriver bdrv_qcow2;
-
-int coroutine_fn bdrv_co_preadv(BdrvChild *child,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
- int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
- int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
-
-static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
- int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
-{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
- return bdrv_co_preadv(child, offset, bytes, &qiov, flags);
-}
-
-static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
- int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
-{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
- return bdrv_co_pwritev(child, offset, bytes, &qiov, flags);
-}
-
-extern unsigned int bdrv_drain_all_count;
-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
-
-bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
- uint64_t align);
-BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs);
-
-int get_tmp_filename(char *filename, int size);
-BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
- const char *filename);
-
-void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
- QDict *options);
-
-/**
- * bdrv_add_aio_context_notifier:
- *
- * If a long-running job intends to be always run in the same AioContext as a
- * certain BDS, it may use this function to be notified of changes regarding the
- * association of the BDS to an AioContext.
- *
- * attached_aio_context() is called after the target BDS has been attached to a
- * new AioContext; detach_aio_context() is called before the target BDS is being
- * detached from its old AioContext.
- */
-void bdrv_add_aio_context_notifier(BlockDriverState *bs,
- void (*attached_aio_context)(AioContext *new_context, void *opaque),
- void (*detach_aio_context)(void *opaque), void *opaque);
-
-/**
- * bdrv_remove_aio_context_notifier:
- *
- * Unsubscribe of change notifications regarding the BDS's AioContext. The
- * parameters given here have to be the same as those given to
- * bdrv_add_aio_context_notifier().
- */
-void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
- void (*aio_context_attached)(AioContext *,
- void *),
- void (*aio_context_detached)(void *),
- void *opaque);
-
-/**
- * bdrv_wakeup:
- * @bs: The BlockDriverState for which an I/O operation has been completed.
- *
- * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During
- * synchronous I/O on a BlockDriverState that is attached to another
- * I/O thread, the main thread lets the I/O thread's event loop run,
- * waiting for the I/O operation to complete. A bdrv_wakeup will wake
- * up the main thread if necessary.
- *
- * Manual calls to bdrv_wakeup are rarely necessary, because
- * bdrv_dec_in_flight already calls it.
- */
-void bdrv_wakeup(BlockDriverState *bs);
-
-#ifdef _WIN32
-int is_windows_drive(const char *filename);
-#endif
-
-/**
- * stream_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Block device to operate on.
- * @base: Block device that will become the new base, or %NULL to
- * flatten the whole backing file chain onto @bs.
- * @backing_file_str: The file name that will be written to @bs as the
- * the new backing file if the job completes. Ignored if @base is %NULL.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the stream job inserts into the graph above
- * @bs. NULL means that a node name should be autogenerated.
- * @errp: Error object.
- *
- * Start a streaming operation on @bs. Clusters that are unallocated
- * in @bs, but allocated in any image between @base and @bs (both
- * exclusive) will be written to @bs. At the end of a successful
- * streaming job, the backing file of @bs will be changed to
- * @backing_file_str in the written image and to @base in the live
- * BlockDriverState.
- */
-void stream_start(const char *job_id, BlockDriverState *bs,
- BlockDriverState *base, const char *backing_file_str,
- BlockDriverState *bottom,
- int creation_flags, int64_t speed,
- BlockdevOnError on_error,
- const char *filter_node_name,
- Error **errp);
-
-/**
- * commit_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Active block device.
- * @top: Top block device to be committed.
- * @base: Block device that will be written into, and become the new top.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @backing_file_str: String to use as the backing file in @top's overlay
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the commit job inserts into the graph above @top. NULL means
- * that a node name should be autogenerated.
- * @errp: Error object.
- *
- */
-void commit_start(const char *job_id, BlockDriverState *bs,
- BlockDriverState *base, BlockDriverState *top,
- int creation_flags, int64_t speed,
- BlockdevOnError on_error, const char *backing_file_str,
- const char *filter_node_name, Error **errp);
-/**
- * commit_active_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Active block device to be committed.
- * @base: Block device that will be written into, and become the new top.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the commit job inserts into the graph above @bs. NULL means that
- * a node name should be autogenerated.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @auto_complete: Auto complete the job.
- * @errp: Error object.
- *
- */
-BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
- BlockDriverState *base, int creation_flags,
- int64_t speed, BlockdevOnError on_error,
- const char *filter_node_name,
- BlockCompletionFunc *cb, void *opaque,
- bool auto_complete, Error **errp);
-/*
- * mirror_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Block device to operate on.
- * @target: Block device to write to.
- * @replaces: Block graph node name to replace once the mirror is done. Can
- * only be used when full mirroring is selected.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @granularity: The chosen granularity for the dirty bitmap.
- * @buf_size: The amount of data that can be in flight at one time.
- * @mode: Whether to collapse all images in the chain to the target.
- * @backing_mode: How to establish the target's backing chain after completion.
- * @zero_target: Whether the target should be explicitly zero-initialized
- * @on_source_error: The action to take upon error reading from the source.
- * @on_target_error: The action to take upon error writing to the target.
- * @unmap: Whether to unmap target where source sectors only contain zeroes.
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the mirror job inserts into the graph above @bs. NULL means that
- * a node name should be autogenerated.
- * @copy_mode: When to trigger writes to the target.
- * @errp: Error object.
- *
- * Start a mirroring operation on @bs. Clusters that are allocated
- * in @bs will be written to @target until the job is cancelled or
- * manually completed. At the end of a successful mirroring job,
- * @bs will be switched to read from @target.
- */
-void mirror_start(const char *job_id, BlockDriverState *bs,
- BlockDriverState *target, const char *replaces,
- int creation_flags, int64_t speed,
- uint32_t granularity, int64_t buf_size,
- MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
- bool zero_target,
- BlockdevOnError on_source_error,
- BlockdevOnError on_target_error,
- bool unmap, const char *filter_node_name,
- MirrorCopyMode copy_mode, Error **errp);
-
-/*
- * backup_job_create:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Block device to operate on.
- * @target: Block device to write to.
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @sync_mode: What parts of the disk image should be copied to the destination.
- * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental'
- * @bitmap_mode: The bitmap synchronization policy to use.
- * @perf: Performance options. All actual fields assumed to be present,
- * all ".has_*" fields are ignored.
- * @on_source_error: The action to take upon error reading from the source.
- * @on_target_error: The action to take upon error writing to the target.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @txn: Transaction that this job is part of (may be NULL).
- *
- * Create a backup operation on @bs. Clusters in @bs are written to @target
- * until the job is cancelled or manually completed.
- */
-BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
- BlockDriverState *target, int64_t speed,
- MirrorSyncMode sync_mode,
- BdrvDirtyBitmap *sync_bitmap,
- BitmapSyncMode bitmap_mode,
- bool compress,
- const char *filter_node_name,
- BackupPerf *perf,
- BlockdevOnError on_source_error,
- BlockdevOnError on_target_error,
- int creation_flags,
- BlockCompletionFunc *cb, void *opaque,
- JobTxn *txn, Error **errp);
-
-BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
- const char *child_name,
- const BdrvChildClass *child_class,
- BdrvChildRole child_role,
- uint64_t perm, uint64_t shared_perm,
- void *opaque, Error **errp);
-void bdrv_root_unref_child(BdrvChild *child);
-
-void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
- uint64_t *shared_perm);
-
-/**
- * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use
- * bdrv_child_refresh_perms() instead and make the parent's
- * .bdrv_child_perm() implementation return the correct values.
- */
-int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
- Error **errp);
-
-/**
- * Calls bs->drv->bdrv_child_perm() and updates the child's permission
- * masks with the result.
- * Drivers should invoke this function whenever an event occurs that
- * makes their .bdrv_child_perm() implementation return different
- * values than before, but which will not result in the block layer
- * automatically refreshing the permissions.
- */
-int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp);
-
-bool bdrv_recurse_can_replace(BlockDriverState *bs,
- BlockDriverState *to_replace);
-
-/*
- * Default implementation for BlockDriver.bdrv_child_perm() that can
- * be used by block filters and image formats, as long as they use the
- * child_of_bds child class and set an appropriate BdrvChildRole.
- */
-void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
- BdrvChildRole role, BlockReopenQueue *reopen_queue,
- uint64_t perm, uint64_t shared,
- uint64_t *nperm, uint64_t *nshared);
-
-const char *bdrv_get_parent_name(const BlockDriverState *bs);
-void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
-bool blk_dev_has_removable_media(BlockBackend *blk);
-bool blk_dev_has_tray(BlockBackend *blk);
-void blk_dev_eject_request(BlockBackend *blk, bool force);
-bool blk_dev_is_tray_open(BlockBackend *blk);
-bool blk_dev_is_medium_locked(BlockBackend *blk);
-
-void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
-
-void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
-void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup);
-bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
- const BdrvDirtyBitmap *src,
- HBitmap **backup, bool lock);
-
-void bdrv_inc_in_flight(BlockDriverState *bs);
-void bdrv_dec_in_flight(BlockDriverState *bs);
-
-void blockdev_close_all_bdrv_states(void);
-
-int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
- BdrvChild *dst, int64_t dst_offset,
- int64_t bytes,
- BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
- BdrvChild *dst, int64_t dst_offset,
- int64_t bytes,
- BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-
-int refresh_total_sectors(BlockDriverState *bs, int64_t hint);
-
-void bdrv_set_monitor_owned(BlockDriverState *bs);
-BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp);
-
-/**
- * Simple implementation of bdrv_co_create_opts for protocol drivers
- * which only support creation via opening a file
- * (usually existing raw storage device)
- */
-int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
- const char *filename,
- QemuOpts *opts,
- Error **errp);
-extern QemuOptsList bdrv_create_opts_simple;
-
-BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
- const char *name,
- BlockDriverState **pbs,
- Error **errp);
-BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
- BlockDirtyBitmapMergeSourceList *bms,
- HBitmap **backup, Error **errp);
-BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
- bool release,
- BlockDriverState **bitmap_bs,
- Error **errp);
-
-BdrvChild *bdrv_cow_child(BlockDriverState *bs);
-BdrvChild *bdrv_filter_child(BlockDriverState *bs);
-BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs);
-BdrvChild *bdrv_primary_child(BlockDriverState *bs);
-BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs);
-BlockDriverState *bdrv_skip_filters(BlockDriverState *bs);
-BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs);
-
-static inline BlockDriverState *child_bs(BdrvChild *child)
-{
- return child ? child->bs : NULL;
-}
-
-static inline BlockDriverState *bdrv_cow_bs(BlockDriverState *bs)
-{
- return child_bs(bdrv_cow_child(bs));
-}
-
-static inline BlockDriverState *bdrv_filter_bs(BlockDriverState *bs)
-{
- return child_bs(bdrv_filter_child(bs));
-}
-
-static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs)
-{
- return child_bs(bdrv_filter_or_cow_child(bs));
-}
-
-static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs)
-{
- return child_bs(bdrv_primary_child(bs));
-}
-
-/**
- * End all quiescent sections started by bdrv_drain_all_begin(). This is
- * needed when deleting a BDS before bdrv_drain_all_end() is called.
- *
- * NOTE: this is an internal helper for bdrv_close() *only*. No one else
- * should call it.
- */
-void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
-
-/**
- * Check whether the given offset is in the cached block-status data
- * region.
- *
- * If it is, and @pnum is not NULL, *pnum is set to
- * `bsc.data_end - offset`, i.e. how many bytes, starting from
- * @offset, are data (according to the cache).
- * Otherwise, *pnum is not touched.
- */
-bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum);
-
-/**
- * If [offset, offset + bytes) overlaps with the currently cached
- * block-status region, invalidate the cache.
- *
- * (To be used by I/O paths that cause data regions to be zero or
- * holes.)
- */
-void bdrv_bsc_invalidate_range(BlockDriverState *bs,
- int64_t offset, int64_t bytes);
-
-/**
- * Mark the range [offset, offset + bytes) as a data region.
- */
-void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
+/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
#endif /* BLOCK_INT_H */
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 87fbb3985f..6525e16fd5 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -74,6 +74,13 @@ typedef struct BlockJob {
GSList *nodes;
} BlockJob;
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
/**
* block_job_next:
* @job: A block job, or %NULL.
@@ -155,6 +162,21 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp);
*/
void block_job_iostatus_reset(BlockJob *job);
+/*
+ * block_job_get_aio_context:
+ *
+ * Returns aio context associated with a block job.
+ */
+AioContext *block_job_get_aio_context(BlockJob *job);
+
+
+/*
+ * Common functions that are neither I/O nor Global State.
+ *
+ * See include/block/block-common.h for more information about
+ * the Common API.
+ */
+
/**
* block_job_is_internal:
* @job: The job to determine if it is user-visible or not.
@@ -170,11 +192,4 @@ bool block_job_is_internal(BlockJob *job);
*/
const BlockJobDriver *block_job_driver(BlockJob *job);
-/*
- * block_job_get_aio_context:
- *
- * Returns aio context associated with a block job.
- */
-AioContext *block_job_get_aio_context(BlockJob *job);
-
#endif
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 6633d83da2..6bd9ae2b20 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -39,6 +39,13 @@ struct BlockJobDriver {
JobDriver job_driver;
/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+ /*
* Returns whether the job has pending requests for the child or will
* submit new requests before the next pause point. This callback is polled
* in the context of draining a job node after requesting that the job be
@@ -47,6 +54,13 @@ struct BlockJobDriver {
bool (*drained_poll)(BlockJob *job);
/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+ /*
* If the callback is not NULL, it will be invoked before the job is
* resumed in a new AioContext. This is the place to move any resources
* besides job->blk to the new AioContext.
@@ -56,6 +70,13 @@ struct BlockJobDriver {
void (*set_speed)(BlockJob *job, int64_t speed);
};
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
/**
* block_job_create:
* @job_id: The id of the newly-created job, or %NULL to have one
@@ -98,6 +119,13 @@ void block_job_free(Job *job);
*/
void block_job_user_resume(Job *job);
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
/**
* block_job_ratelimit_get_delay:
*
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 40950ae3d5..6528336c4c 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -77,7 +77,7 @@ void bdrv_dirty_bitmap_set_persistence(BdrvDirtyBitmap *bitmap,
bool persistent);
void bdrv_dirty_bitmap_set_inconsistent(BdrvDirtyBitmap *bitmap);
void bdrv_dirty_bitmap_set_busy(BdrvDirtyBitmap *bitmap, bool busy);
-void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
+bool bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
HBitmap **backup, Error **errp);
void bdrv_dirty_bitmap_skip_store(BdrvDirtyBitmap *bitmap, bool skip);
bool bdrv_dirty_bitmap_get(BdrvDirtyBitmap *bitmap, int64_t offset);
@@ -115,6 +115,8 @@ int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, int64_t offset,
bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap *bitmap,
int64_t start, int64_t end, int64_t max_dirty_count,
int64_t *dirty_start, int64_t *dirty_count);
+bool bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap, int64_t offset,
+ int64_t bytes, int64_t *count);
BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
Error **errp);
diff --git a/include/block/nvme.h b/include/block/nvme.h
index cd068ac891..3737351cc8 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -695,7 +695,8 @@ typedef struct QEMU_PACKED NvmeRwCmd {
uint8_t flags;
uint16_t cid;
uint32_t nsid;
- uint64_t rsvd2;
+ uint32_t cdw2;
+ uint32_t cdw3;
uint64_t mptr;
NvmeCmdDptr dptr;
uint64_t slba;
@@ -731,7 +732,6 @@ enum {
NVME_RW_PRINFO_PRCHK_APP = 1 << 11,
NVME_RW_PRINFO_PRCHK_REF = 1 << 10,
NVME_RW_PRINFO_PRCHK_MASK = 7 << 10,
-
};
#define NVME_RW_PRINFO(control) ((control >> 10) & 0xf)
@@ -770,6 +770,7 @@ typedef struct QEMU_PACKED NvmeDsmRange {
enum {
NVME_COPY_FORMAT_0 = 0x0,
+ NVME_COPY_FORMAT_1 = 0x1,
};
typedef struct QEMU_PACKED NvmeCopyCmd {
@@ -777,7 +778,9 @@ typedef struct QEMU_PACKED NvmeCopyCmd {
uint8_t flags;
uint16_t cid;
uint32_t nsid;
- uint32_t rsvd2[4];
+ uint32_t cdw2;
+ uint32_t cdw3;
+ uint32_t rsvd2[2];
NvmeCmdDptr dptr;
uint64_t sdlba;
uint8_t nr;
@@ -789,7 +792,7 @@ typedef struct QEMU_PACKED NvmeCopyCmd {
uint16_t appmask;
} NvmeCopyCmd;
-typedef struct QEMU_PACKED NvmeCopySourceRange {
+typedef struct QEMU_PACKED NvmeCopySourceRangeFormat0 {
uint8_t rsvd0[8];
uint64_t slba;
uint16_t nlb;
@@ -797,7 +800,17 @@ typedef struct QEMU_PACKED NvmeCopySourceRange {
uint32_t reftag;
uint16_t apptag;
uint16_t appmask;
-} NvmeCopySourceRange;
+} NvmeCopySourceRangeFormat0;
+
+typedef struct QEMU_PACKED NvmeCopySourceRangeFormat1 {
+ uint8_t rsvd0[8];
+ uint64_t slba;
+ uint16_t nlb;
+ uint8_t rsvd18[8];
+ uint8_t sr[10];
+ uint16_t apptag;
+ uint16_t appmask;
+} NvmeCopySourceRangeFormat1;
enum NvmeAsyncEventRequest {
NVME_AER_TYPE_ERROR = 0,
@@ -908,6 +921,7 @@ enum NvmeStatusCodes {
NVME_CMP_FAILURE = 0x0285,
NVME_ACCESS_DENIED = 0x0286,
NVME_DULB = 0x0287,
+ NVME_E2E_STORAGE_TAG_ERROR = 0x0288,
NVME_MORE = 0x2000,
NVME_DNR = 0x4000,
NVME_NO_COMPLETE = 0xffff,
@@ -1111,6 +1125,10 @@ enum NvmeIdCtrlOaes {
NVME_OAES_NS_ATTR = 1 << 8,
};
+enum NvmeIdCtrlCtratt {
+ NVME_CTRATT_ELBAS = 1 << 15,
+};
+
enum NvmeIdCtrlOacs {
NVME_OACS_SECURITY = 1 << 0,
NVME_OACS_FORMAT = 1 << 1,
@@ -1131,7 +1149,8 @@ enum NvmeIdCtrlOncs {
};
enum NvmeIdCtrlOcfs {
- NVME_OCFS_COPY_FORMAT_0 = 1 << 0,
+ NVME_OCFS_COPY_FORMAT_0 = 1 << NVME_COPY_FORMAT_0,
+ NVME_OCFS_COPY_FORMAT_1 = 1 << NVME_COPY_FORMAT_1,
};
enum NvmeIdctrlVwc {
@@ -1216,6 +1235,7 @@ enum NvmeFeatureIds {
NVME_WRITE_ATOMICITY = 0xa,
NVME_ASYNCHRONOUS_EVENT_CONF = 0xb,
NVME_TIMESTAMP = 0xe,
+ NVME_HOST_BEHAVIOR_SUPPORT = 0x16,
NVME_COMMAND_SET_PROFILE = 0x19,
NVME_SOFTWARE_PROGRESS_MARKER = 0x80,
NVME_FID_MAX = 0x100,
@@ -1257,6 +1277,13 @@ typedef struct QEMU_PACKED NvmeRangeType {
uint8_t rsvd48[16];
} NvmeRangeType;
+typedef struct NvmeHostBehaviorSupport {
+ uint8_t acre;
+ uint8_t etdas;
+ uint8_t lbafee;
+ uint8_t rsvd3[509];
+} NvmeHostBehaviorSupport;
+
typedef struct QEMU_PACKED NvmeLBAF {
uint16_t ms;
uint8_t ds;
@@ -1270,6 +1297,7 @@ typedef struct QEMU_PACKED NvmeLBAFE {
} NvmeLBAFE;
#define NVME_NSID_BROADCAST 0xffffffff
+#define NVME_MAX_NLBAF 64
typedef struct QEMU_PACKED NvmeIdNs {
uint64_t nsze;
@@ -1304,11 +1332,20 @@ typedef struct QEMU_PACKED NvmeIdNs {
uint8_t rsvd81[23];
uint8_t nguid[16];
uint64_t eui64;
- NvmeLBAF lbaf[16];
- uint8_t rsvd192[192];
+ NvmeLBAF lbaf[NVME_MAX_NLBAF];
uint8_t vs[3712];
} NvmeIdNs;
+#define NVME_ID_NS_NVM_ELBAF_PIF(elbaf) (((elbaf) >> 7) & 0x3)
+
+typedef struct QEMU_PACKED NvmeIdNsNvm {
+ uint64_t lbstm;
+ uint8_t pic;
+ uint8_t rsvd9[3];
+ uint32_t elbaf[NVME_MAX_NLBAF];
+ uint8_t rsvd268[3828];
+} NvmeIdNsNvm;
+
typedef struct QEMU_PACKED NvmeIdNsDescr {
uint8_t nidt;
uint8_t nidl;
@@ -1410,10 +1447,23 @@ enum NvmeIdNsMc {
#define NVME_ID_NS_DPS_TYPE(dps) (dps & NVME_ID_NS_DPS_TYPE_MASK)
-typedef struct NvmeDifTuple {
- uint16_t guard;
- uint16_t apptag;
- uint32_t reftag;
+enum NvmePIFormat {
+ NVME_PI_GUARD_16 = 0,
+ NVME_PI_GUARD_64 = 2,
+};
+
+typedef union NvmeDifTuple {
+ struct {
+ uint16_t guard;
+ uint16_t apptag;
+ uint32_t reftag;
+ } g16;
+
+ struct {
+ uint64_t guard;
+ uint16_t apptag;
+ uint8_t sr[6];
+ } g64;
} NvmeDifTuple;
enum NvmeZoneAttr {
@@ -1510,7 +1560,8 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeZonedResult) != 8);
QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmeDsmRange) != 16);
- QEMU_BUILD_BUG_ON(sizeof(NvmeCopySourceRange) != 32);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeCopySourceRangeFormat0) != 32);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeCopySourceRangeFormat1) != 40);
QEMU_BUILD_BUG_ON(sizeof(NvmeCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeDeleteQ) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeCreateCq) != 64);
@@ -1520,6 +1571,7 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeDsmCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeCopyCmd) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeRangeType) != 64);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeHostBehaviorSupport) != 512);
QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512);
QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512);
@@ -1530,10 +1582,11 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeLBAF) != 4);
QEMU_BUILD_BUG_ON(sizeof(NvmeLBAFE) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsNvm) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsZoned) != 4096);
QEMU_BUILD_BUG_ON(sizeof(NvmeSglDescriptor) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsDescr) != 4);
QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescr) != 64);
- QEMU_BUILD_BUG_ON(sizeof(NvmeDifTuple) != 8);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeDifTuple) != 16);
}
#endif
diff --git a/include/block/reqlist.h b/include/block/reqlist.h
new file mode 100644
index 0000000000..5253497bae
--- /dev/null
+++ b/include/block/reqlist.h
@@ -0,0 +1,75 @@
+/*
+ * reqlist API
+ *
+ * Copyright (C) 2013 Proxmox Server Solutions
+ * Copyright (c) 2021 Virtuozzo International GmbH.
+ *
+ * Authors:
+ * Dietmar Maurer (dietmar@proxmox.com)
+ * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef REQLIST_H
+#define REQLIST_H
+
+#include "qemu/coroutine.h"
+
+/*
+ * The API is not thread-safe and shouldn't be. The struct is public to be part
+ * of other structures and protected by third-party locks, see
+ * block/block-copy.c for example.
+ */
+
+typedef struct BlockReq {
+ int64_t offset;
+ int64_t bytes;
+
+ CoQueue wait_queue; /* coroutines blocked on this req */
+ QLIST_ENTRY(BlockReq) list;
+} BlockReq;
+
+typedef QLIST_HEAD(, BlockReq) BlockReqList;
+
+/*
+ * Initialize new request and add it to the list. Caller must be sure that
+ * there are no conflicting requests in the list.
+ */
+void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset,
+ int64_t bytes);
+/* Search for request in the list intersecting with @offset/@bytes area. */
+BlockReq *reqlist_find_conflict(BlockReqList *reqs, int64_t offset,
+ int64_t bytes);
+
+/*
+ * If there are no intersecting requests return false. Otherwise, wait for the
+ * first found intersecting request to finish and return true.
+ *
+ * @lock is passed to qemu_co_queue_wait()
+ * False return value proves that lock was released at no point.
+ */
+bool coroutine_fn reqlist_wait_one(BlockReqList *reqs, int64_t offset,
+ int64_t bytes, CoMutex *lock);
+
+/*
+ * Wait for all intersecting requests. It just calls reqlist_wait_one() in a
+ * loop, caller is responsible to stop producing new requests in this region
+ * in parallel, otherwise reqlist_wait_all() may never return.
+ */
+void coroutine_fn reqlist_wait_all(BlockReqList *reqs, int64_t offset,
+ int64_t bytes, CoMutex *lock);
+
+/*
+ * Shrink request and wake all waiting coroutines (maybe some of them are not
+ * intersecting with shrunk request).
+ */
+void coroutine_fn reqlist_shrink_req(BlockReq *req, int64_t new_bytes);
+
+/*
+ * Remove request and wake all waiting coroutines. Do not release any memory.
+ */
+void coroutine_fn reqlist_remove_req(BlockReq *req);
+
+#endif /* REQLIST_H */
diff --git a/include/block/snapshot.h b/include/block/snapshot.h
index 940345692f..50ff924710 100644
--- a/include/block/snapshot.h
+++ b/include/block/snapshot.h
@@ -45,6 +45,13 @@ typedef struct QEMUSnapshotInfo {
uint64_t icount; /* record/replay step */
} QEMUSnapshotInfo;
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
const char *name);
bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
@@ -73,9 +80,11 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
Error **errp);
-/* Group operations. All block drivers are involved.
+/*
+ * Group operations. All block drivers are involved.
* These functions will properly handle dataplane (take aio_context_acquire
- * when appropriate for appropriate block drivers */
+ * when appropriate for appropriate block drivers
+ */
bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
Error **errp);
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 84caf5c3d9..c0f0fab28a 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -433,10 +433,6 @@ int cpu_exec(CPUState *cpu);
void tcg_exec_realizefn(CPUState *cpu, Error **errp);
void tcg_exec_unrealizefn(CPUState *cpu);
-/* Returns: 0 on success, -1 on error */
-int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
- void *ptr, target_ulong len, bool is_write);
-
/**
* cpu_set_cpustate_pointers(cpu)
* @cpu: The cpu object
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index de5f444b19..7f7b5943c7 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -7,6 +7,18 @@
#include "exec/hwaddr.h"
#endif
+/**
+ * vaddr:
+ * Type wide enough to contain any #target_ulong virtual address.
+ */
+typedef uint64_t vaddr;
+#define VADDR_PRId PRId64
+#define VADDR_PRIu PRIu64
+#define VADDR_PRIo PRIo64
+#define VADDR_PRIx PRIx64
+#define VADDR_PRIX PRIX64
+#define VADDR_MAX UINT64_MAX
+
/* Using intptr_t ensures that qemu_*_page_mask is sign-extended even
* when intptr_t is 32-bit and we are aligning a long long.
*/
@@ -78,6 +90,28 @@ void qemu_ram_unset_migratable(RAMBlock *rb);
size_t qemu_ram_pagesize(RAMBlock *block);
size_t qemu_ram_pagesize_largest(void);
+/**
+ * cpu_address_space_init:
+ * @cpu: CPU to add this address space to
+ * @asidx: integer index of this address space
+ * @prefix: prefix to be used as name of address space
+ * @mr: the root memory region of address space
+ *
+ * Add the specified address space to the CPU's cpu_ases list.
+ * The address space added with @asidx 0 is the one used for the
+ * convenience pointer cpu->as.
+ * The target-specific code which registers ASes is responsible
+ * for defining what semantics address space 0, 1, 2, etc have.
+ *
+ * Before the first call to this function, the caller must set
+ * cpu->num_ases to the total number of address spaces it needs
+ * to support.
+ *
+ * Note that with KVM only one address space is supported.
+ */
+void cpu_address_space_init(CPUState *cpu, int asidx,
+ const char *prefix, MemoryRegion *mr);
+
void cpu_physical_memory_rw(hwaddr addr, void *buf,
hwaddr len, bool is_write);
static inline void cpu_physical_memory_read(hwaddr addr,
@@ -90,6 +124,7 @@ static inline void cpu_physical_memory_write(hwaddr addr,
{
cpu_physical_memory_rw(addr, (void *)buf, len, true);
}
+void cpu_reloading_memory_map(void);
void *cpu_physical_memory_map(hwaddr addr,
hwaddr *plen,
bool is_write);
@@ -116,6 +151,10 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
#endif
+/* Returns: 0 on success, -1 on error */
+int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
+ void *ptr, size_t len, bool is_write);
+
/* vl.c */
extern int singlestep;
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index da987fe8ad..6adacf8928 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -64,6 +64,7 @@
#include "exec/memopidx.h"
#include "qemu/int128.h"
+#include "cpu.h"
#if defined(CONFIG_USER_ONLY)
/* sparc32plus has 64bit long but 32bit space address
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 227e10ba56..d2cb0981f4 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -24,7 +24,6 @@
#ifdef CONFIG_TCG
#include "exec/cpu_ldst.h"
#endif
-#include "sysemu/cpu-timers.h"
/* allow to see translation results - the slowdown should be negligible, so we leave it */
#define DEBUG_DISAS
@@ -81,31 +80,6 @@ static inline bool cpu_loop_exit_requested(CPUState *cpu)
return (int32_t)qatomic_read(&cpu_neg(cpu)->icount_decr.u32) < 0;
}
-#if !defined(CONFIG_USER_ONLY)
-void cpu_reloading_memory_map(void);
-/**
- * cpu_address_space_init:
- * @cpu: CPU to add this address space to
- * @asidx: integer index of this address space
- * @prefix: prefix to be used as name of address space
- * @mr: the root memory region of address space
- *
- * Add the specified address space to the CPU's cpu_ases list.
- * The address space added with @asidx 0 is the one used for the
- * convenience pointer cpu->as.
- * The target-specific code which registers ASes is responsible
- * for defining what semantics address space 0, 1, 2, etc have.
- *
- * Before the first call to this function, the caller must set
- * cpu->num_ases to the total number of address spaces it needs
- * to support.
- *
- * Note that with KVM only one address space is supported.
- */
-void cpu_address_space_init(CPUState *cpu, int asidx,
- const char *prefix, MemoryRegion *mr);
-#endif
-
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
/* cputlb.c */
/**
diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h
index a024a0350d..89edf94d28 100644
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -45,17 +45,6 @@ void gdb_do_syscall(gdb_syscall_complete_cb cb, const char *fmt, ...);
*/
void gdb_do_syscallv(gdb_syscall_complete_cb cb, const char *fmt, va_list va);
int use_gdb_syscalls(void);
-void gdb_set_stop_cpu(CPUState *cpu);
-
-/**
- * gdb_exit: exit gdb session, reporting inferior status
- * @code: exit code reported
- *
- * This closes the session and sends a final packet to GDB reporting
- * the exit status of the program. It also cleans up any connections
- * detritus before returning.
- */
-void gdb_exit(int code);
#ifdef CONFIG_USER_ONLY
/**
@@ -165,7 +154,7 @@ static inline uint8_t * gdb_get_reg_ptr(GByteArray *buf, int len)
#define ldtul_p(addr) ldl_p(addr)
#endif
-#endif
+#endif /* NEED_CPU_H */
/**
* gdbserver_start: start the gdb server
@@ -178,6 +167,18 @@ static inline uint8_t * gdb_get_reg_ptr(GByteArray *buf, int len)
int gdbserver_start(const char *port_or_device);
/**
+ * gdb_exit: exit gdb session, reporting inferior status
+ * @code: exit code reported
+ *
+ * This closes the session and sends a final packet to GDB reporting
+ * the exit status of the program. It also cleans up any connections
+ * detritus before returning.
+ */
+void gdb_exit(int code);
+
+void gdb_set_stop_cpu(CPUState *cpu);
+
+/**
* gdb_has_xml:
* This is an ugly hack to cope with both new and old gdb.
* If gdb sends qXfer:features:read then assume we're talking to a newish
diff --git a/include/exec/poison.h b/include/exec/poison.h
index 7ad4ad18e8..7c5c02f03f 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -51,8 +51,6 @@
#pragma GCC poison TARGET_PAGE_BITS
#pragma GCC poison TARGET_PAGE_ALIGN
-#pragma GCC poison CPUArchState
-
#pragma GCC poison CPU_INTERRUPT_HARD
#pragma GCC poison CPU_INTERRUPT_EXITTB
#pragma GCC poison CPU_INTERRUPT_HALT
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index c1ea17d0de..7e76ee2619 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -132,6 +132,7 @@ struct VirtMachineClass {
bool no_secure_gpio;
/* Machines < 6.2 have no support for describing cpu topology to guest */
bool no_cpu_topology;
+ bool no_tcg_lpa2;
};
struct VirtMachineState {
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 76ab3b851c..0efc6153ed 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -22,6 +22,7 @@
#include "hw/qdev-core.h"
#include "disas/dis-asm.h"
+#include "exec/cpu-common.h"
#include "exec/hwaddr.h"
#include "exec/memattrs.h"
#include "qapi/qapi-types-run-state.h"
@@ -36,18 +37,6 @@ typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
void *opaque);
/**
- * vaddr:
- * Type wide enough to contain any #target_ulong virtual address.
- */
-typedef uint64_t vaddr;
-#define VADDR_PRId PRId64
-#define VADDR_PRIu PRIu64
-#define VADDR_PRIo PRIo64
-#define VADDR_PRIx PRIx64
-#define VADDR_PRIX PRIX64
-#define VADDR_MAX UINT64_MAX
-
-/**
* SECTION:cpu
* @section_id: QEMU-cpu
* @title: CPU Class
@@ -66,6 +55,24 @@ typedef struct CPUClass CPUClass;
DECLARE_CLASS_CHECKERS(CPUClass, CPU,
TYPE_CPU)
+/**
+ * OBJECT_DECLARE_CPU_TYPE:
+ * @CpuInstanceType: instance struct name
+ * @CpuClassType: class struct name
+ * @CPU_MODULE_OBJ_NAME: the CPU name in uppercase with underscore separators
+ *
+ * This macro is typically used in "cpu-qom.h" header file, and will:
+ *
+ * - create the typedefs for the CPU object and class structs
+ * - register the type for use with g_autoptr
+ * - provide three standard type cast functions
+ *
+ * The object struct and class struct need to be declared manually.
+ */
+#define OBJECT_DECLARE_CPU_TYPE(CpuInstanceType, CpuClassType, CPU_MODULE_OBJ_NAME) \
+ typedef struct ArchCPU CpuInstanceType; \
+ OBJECT_DECLARE_TYPE(ArchCPU, CpuClassType, CPU_MODULE_OBJ_NAME);
+
typedef enum MMUAccessType {
MMU_DATA_LOAD = 0,
MMU_DATA_STORE = 1,
@@ -351,7 +358,7 @@ struct CPUState {
AddressSpace *as;
MemoryRegion *memory;
- void *env_ptr; /* CPUArchState */
+ CPUArchState *env_ptr;
IcountDecr *icount_decr_ptr;
/* Accessed in parallel; all accesses must be atomic */
diff --git a/include/hw/intc/riscv_imsic.h b/include/hw/intc/riscv_imsic.h
new file mode 100644
index 0000000000..58c2aaa8dc
--- /dev/null
+++ b/include/hw/intc/riscv_imsic.h
@@ -0,0 +1,68 @@
+/*
+ * RISC-V IMSIC (Incoming Message Signal Interrupt Controller) interface
+ *
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_IMSIC_H
+#define HW_RISCV_IMSIC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_RISCV_IMSIC "riscv.imsic"
+
+typedef struct RISCVIMSICState RISCVIMSICState;
+DECLARE_INSTANCE_CHECKER(RISCVIMSICState, RISCV_IMSIC, TYPE_RISCV_IMSIC)
+
+#define IMSIC_MMIO_PAGE_SHIFT 12
+#define IMSIC_MMIO_PAGE_SZ (1UL << IMSIC_MMIO_PAGE_SHIFT)
+#define IMSIC_MMIO_SIZE(__num_pages) ((__num_pages) * IMSIC_MMIO_PAGE_SZ)
+
+#define IMSIC_MMIO_HART_GUEST_MAX_BTIS 6
+#define IMSIC_MMIO_GROUP_MIN_SHIFT 24
+
+#define IMSIC_HART_NUM_GUESTS(__guest_bits) \
+ (1U << (__guest_bits))
+#define IMSIC_HART_SIZE(__guest_bits) \
+ (IMSIC_HART_NUM_GUESTS(__guest_bits) * IMSIC_MMIO_PAGE_SZ)
+#define IMSIC_GROUP_NUM_HARTS(__hart_bits) \
+ (1U << (__hart_bits))
+#define IMSIC_GROUP_SIZE(__hart_bits, __guest_bits) \
+ (IMSIC_GROUP_NUM_HARTS(__hart_bits) * IMSIC_HART_SIZE(__guest_bits))
+
+struct RISCVIMSICState {
+ /*< private >*/
+ SysBusDevice parent_obj;
+ qemu_irq *external_irqs;
+
+ /*< public >*/
+ MemoryRegion mmio;
+ uint32_t num_eistate;
+ uint32_t *eidelivery;
+ uint32_t *eithreshold;
+ uint32_t *eistate;
+
+ /* config */
+ bool mmode;
+ uint32_t hartid;
+ uint32_t num_pages;
+ uint32_t num_irqs;
+};
+
+DeviceState *riscv_imsic_create(hwaddr addr, uint32_t hartid, bool mmode,
+ uint32_t num_pages, uint32_t num_ids);
+
+#endif
diff --git a/include/hw/riscv/opentitan.h b/include/hw/riscv/opentitan.h
index eac35ef590..00da9ded43 100644
--- a/include/hw/riscv/opentitan.h
+++ b/include/hw/riscv/opentitan.h
@@ -57,8 +57,10 @@ enum {
IBEX_DEV_FLASH,
IBEX_DEV_FLASH_VIRTUAL,
IBEX_DEV_UART,
+ IBEX_DEV_SPI_DEVICE,
+ IBEX_DEV_SPI_HOST0,
+ IBEX_DEV_SPI_HOST1,
IBEX_DEV_GPIO,
- IBEX_DEV_SPI,
IBEX_DEV_I2C,
IBEX_DEV_PATTGEN,
IBEX_DEV_TIMER,
diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
index 6e9f61ccd9..78b058ec86 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -24,26 +24,36 @@
#include "hw/block/flash.h"
#include "qom/object.h"
-#define VIRT_CPUS_MAX 32
-#define VIRT_SOCKETS_MAX 8
+#define VIRT_CPUS_MAX_BITS 9
+#define VIRT_CPUS_MAX (1 << VIRT_CPUS_MAX_BITS)
+#define VIRT_SOCKETS_MAX_BITS 2
+#define VIRT_SOCKETS_MAX (1 << VIRT_SOCKETS_MAX_BITS)
#define TYPE_RISCV_VIRT_MACHINE MACHINE_TYPE_NAME("virt")
typedef struct RISCVVirtState RISCVVirtState;
DECLARE_INSTANCE_CHECKER(RISCVVirtState, RISCV_VIRT_MACHINE,
TYPE_RISCV_VIRT_MACHINE)
+typedef enum RISCVVirtAIAType {
+ VIRT_AIA_TYPE_NONE = 0,
+ VIRT_AIA_TYPE_APLIC,
+ VIRT_AIA_TYPE_APLIC_IMSIC,
+} RISCVVirtAIAType;
+
struct RISCVVirtState {
/*< private >*/
MachineState parent;
/*< public >*/
RISCVHartArrayState soc[VIRT_SOCKETS_MAX];
- DeviceState *plic[VIRT_SOCKETS_MAX];
+ DeviceState *irqchip[VIRT_SOCKETS_MAX];
PFlashCFI01 *flash[2];
FWCfgState *fw_cfg;
int fdt_size;
bool have_aclint;
+ RISCVVirtAIAType aia_type;
+ int aia_guests;
};
enum {
@@ -54,9 +64,13 @@ enum {
VIRT_CLINT,
VIRT_ACLINT_SSWI,
VIRT_PLIC,
+ VIRT_APLIC_M,
+ VIRT_APLIC_S,
VIRT_UART0,
VIRT_VIRTIO,
VIRT_FW_CFG,
+ VIRT_IMSIC_M,
+ VIRT_IMSIC_S,
VIRT_FLASH,
VIRT_DRAM,
VIRT_PCIE_MMIO,
@@ -73,8 +87,13 @@ enum {
VIRTIO_NDEV = 0x35 /* Arbitrary maximum number of interrupts */
};
-#define VIRT_PLIC_NUM_SOURCES 127
-#define VIRT_PLIC_NUM_PRIORITIES 7
+#define VIRT_IRQCHIP_IPI_MSI 1
+#define VIRT_IRQCHIP_NUM_MSIS 255
+#define VIRT_IRQCHIP_NUM_SOURCES VIRTIO_NDEV
+#define VIRT_IRQCHIP_NUM_PRIO_BITS 3
+#define VIRT_IRQCHIP_MAX_GUESTS_BITS 3
+#define VIRT_IRQCHIP_MAX_GUESTS ((1U << VIRT_IRQCHIP_MAX_GUESTS_BITS) - 1U)
+
#define VIRT_PLIC_PRIORITY_BASE 0x04
#define VIRT_PLIC_PENDING_BASE 0x1000
#define VIRT_PLIC_ENABLE_BASE 0x2000
@@ -86,9 +105,15 @@ enum {
#define FDT_PCI_ADDR_CELLS 3
#define FDT_PCI_INT_CELLS 1
-#define FDT_PLIC_ADDR_CELLS 0
#define FDT_PLIC_INT_CELLS 1
-#define FDT_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + 1 + \
- FDT_PLIC_ADDR_CELLS + FDT_PLIC_INT_CELLS)
+#define FDT_APLIC_INT_CELLS 2
+#define FDT_IMSIC_INT_CELLS 0
+#define FDT_MAX_INT_CELLS 2
+#define FDT_MAX_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
+ 1 + FDT_MAX_INT_CELLS)
+#define FDT_PLIC_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
+ 1 + FDT_PLIC_INT_CELLS)
+#define FDT_APLIC_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \
+ 1 + FDT_APLIC_INT_CELLS)
#endif
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 68b2e3bc10..8c0d9ab0f7 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -26,8 +26,6 @@
int qemu_main(int argc, char **argv, char **envp);
#endif
-void *qemu_oom_check(void *ptr);
-
ssize_t qemu_write_full(int fd, const void *buf, size_t count)
QEMU_WARN_UNUSED_RESULT;
diff --git a/include/qemu/coroutine-tls.h b/include/qemu/coroutine-tls.h
new file mode 100644
index 0000000000..1558a826aa
--- /dev/null
+++ b/include/qemu/coroutine-tls.h
@@ -0,0 +1,165 @@
+/*
+ * QEMU Thread Local Storage for coroutines
+ *
+ * Copyright Red Hat
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ * It is forbidden to access Thread Local Storage in coroutines because
+ * compiler optimizations may cause values to be cached across coroutine
+ * re-entry. Coroutines can run in more than one thread through the course of
+ * their life, leading bugs when stale TLS values from the wrong thread are
+ * used as a result of compiler optimization.
+ *
+ * An example is:
+ *
+ * ..code-block:: c
+ * :caption: A coroutine that may see the wrong TLS value
+ *
+ * static __thread AioContext *current_aio_context;
+ * ...
+ * static void coroutine_fn foo(void)
+ * {
+ * aio_notify(current_aio_context);
+ * qemu_coroutine_yield();
+ * aio_notify(current_aio_context); // <-- may be stale after yielding!
+ * }
+ *
+ * This header provides macros for safely defining variables in Thread Local
+ * Storage:
+ *
+ * ..code-block:: c
+ * :caption: A coroutine that safely uses TLS
+ *
+ * QEMU_DEFINE_STATIC_CO_TLS(AioContext *, current_aio_context)
+ * ...
+ * static void coroutine_fn foo(void)
+ * {
+ * aio_notify(get_current_aio_context());
+ * qemu_coroutine_yield();
+ * aio_notify(get_current_aio_context()); // <-- safe
+ * }
+ */
+
+#ifndef QEMU_COROUTINE_TLS_H
+#define QEMU_COROUTINE_TLS_H
+
+/*
+ * To stop the compiler from caching TLS values we define accessor functions
+ * with __attribute__((noinline)) plus asm volatile("") to prevent
+ * optimizations that override noinline.
+ *
+ * The compiler can still analyze noinline code and make optimizations based on
+ * that knowledge, so an inline asm output operand is used to prevent
+ * optimizations that make assumptions about the address of the TLS variable.
+ *
+ * This is fragile and ultimately needs to be solved by a mechanism that is
+ * guaranteed to work by the compiler (e.g. stackless coroutines), but for now
+ * we use this approach to prevent issues.
+ */
+
+/**
+ * QEMU_DECLARE_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Declare an extern variable in Thread Local Storage from a header file:
+ *
+ * .. code-block:: c
+ * :caption: Declaring an extern variable in Thread Local Storage
+ *
+ * QEMU_DECLARE_CO_TLS(int, my_count)
+ * ...
+ * int c = get_my_count();
+ * set_my_count(c + 1);
+ * *get_ptr_my_count() = 0;
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ * :caption: Declaring a TLS variable using __thread
+ *
+ * extern __thread int my_count;
+ * ...
+ * int c = my_count;
+ * my_count = c + 1;
+ * *(&my_count) = 0;
+ */
+#define QEMU_DECLARE_CO_TLS(type, var) \
+ __attribute__((noinline)) type get_##var(void); \
+ __attribute__((noinline)) void set_##var(type v); \
+ __attribute__((noinline)) type *get_ptr_##var(void);
+
+/**
+ * QEMU_DEFINE_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Define a variable in Thread Local Storage that was previously declared from
+ * a header file with QEMU_DECLARE_CO_TLS():
+ *
+ * .. code-block:: c
+ * :caption: Defining a variable in Thread Local Storage
+ *
+ * QEMU_DEFINE_CO_TLS(int, my_count)
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ * :caption: Defining a TLS variable using __thread
+ *
+ * __thread int my_count;
+ */
+#define QEMU_DEFINE_CO_TLS(type, var) \
+ static __thread type co_tls_##var; \
+ type get_##var(void) { asm volatile(""); return co_tls_##var; } \
+ void set_##var(type v) { asm volatile(""); co_tls_##var = v; } \
+ type *get_ptr_##var(void) \
+ { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; }
+
+/**
+ * QEMU_DEFINE_STATIC_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Define a static variable in Thread Local Storage:
+ *
+ * .. code-block:: c
+ * :caption: Defining a static variable in Thread Local Storage
+ *
+ * QEMU_DEFINE_STATIC_CO_TLS(int, my_count)
+ * ...
+ * int c = get_my_count();
+ * set_my_count(c + 1);
+ * *get_ptr_my_count() = 0;
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ * :caption: Defining a static TLS variable using __thread
+ *
+ * static __thread int my_count;
+ * ...
+ * int c = my_count;
+ * my_count = c + 1;
+ * *(&my_count) = 0;
+ */
+#define QEMU_DEFINE_STATIC_CO_TLS(type, var) \
+ static __thread type co_tls_##var; \
+ static __attribute__((noinline, unused)) \
+ type get_##var(void) \
+ { asm volatile(""); return co_tls_##var; } \
+ static __attribute__((noinline, unused)) \
+ void set_##var(type v) \
+ { asm volatile(""); co_tls_##var = v; } \
+ static __attribute__((noinline, unused)) \
+ type *get_ptr_##var(void) \
+ { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; }
+
+#endif /* QEMU_COROUTINE_TLS_H */
diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h
index 09fc245b91..7adb12d320 100644
--- a/include/qemu/cpuid.h
+++ b/include/qemu/cpuid.h
@@ -45,12 +45,26 @@
#ifndef bit_AVX2
#define bit_AVX2 (1 << 5)
#endif
-#ifndef bit_AVX512F
-#define bit_AVX512F (1 << 16)
-#endif
#ifndef bit_BMI2
#define bit_BMI2 (1 << 8)
#endif
+#ifndef bit_AVX512F
+#define bit_AVX512F (1 << 16)
+#endif
+#ifndef bit_AVX512DQ
+#define bit_AVX512DQ (1 << 17)
+#endif
+#ifndef bit_AVX512BW
+#define bit_AVX512BW (1 << 30)
+#endif
+#ifndef bit_AVX512VL
+#define bit_AVX512VL (1u << 31)
+#endif
+
+/* Leaf 7, %ecx */
+#ifndef bit_AVX512VBMI2
+#define bit_AVX512VBMI2 (1 << 6)
+#endif
/* Leaf 0x80000001, %ecx */
#ifndef bit_LZCNT
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 5e71b6d6f7..5bd986aa44 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -340,6 +340,18 @@ bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t start, int64_t end,
int64_t max_dirty_count,
int64_t *dirty_start, int64_t *dirty_count);
+/*
+ * bdrv_dirty_bitmap_status:
+ * @hb: The HBitmap to operate on
+ * @start: The bit to start from
+ * @count: Number of bits to proceed
+ * @pnum: Out-parameter. How many bits has same value starting from @start
+ *
+ * Returns true if bitmap is dirty at @start, false otherwise.
+ */
+bool hbitmap_status(const HBitmap *hb, int64_t start, int64_t count,
+ int64_t *pnum);
+
/**
* hbitmap_iter_next:
* @hbi: HBitmapIter to operate on.
diff --git a/include/qemu/job.h b/include/qemu/job.h
index 6e67b6977f..c105b31076 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -169,6 +169,12 @@ typedef struct Job {
* Callbacks and other information about a Job driver.
*/
struct JobDriver {
+
+ /*
+ * These fields are initialized when this object is created,
+ * and are never changed afterwards
+ */
+
/** Derived Job struct size */
size_t instance_size;
@@ -184,9 +190,18 @@ struct JobDriver {
* aborted. If it returns zero, the job moves into the WAITING state. If it
* is the last job to complete in its transaction, all jobs in the
* transaction move from WAITING to PENDING.
+ *
+ * This callback must be run in the job's context.
*/
int coroutine_fn (*run)(Job *job, Error **errp);
+ /*
+ * Functions run without regard to the BQL that may run in any
+ * arbitrary thread. These functions do not need to be thread-safe
+ * because the caller ensures that they are invoked from one
+ * thread at time.
+ */
+
/**
* If the callback is not NULL, it will be invoked when the job transitions
* into the paused state. Paused jobs must not perform any asynchronous
@@ -201,6 +216,13 @@ struct JobDriver {
*/
void coroutine_fn (*resume)(Job *job);
+ /*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
/**
* Called when the job is resumed by the user (i.e. user_paused becomes
* false). .user_resume is called before .resume.
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 8dbc6fcb89..7a4d6a0920 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -242,10 +242,52 @@ AioContext *iohandler_get_aio_context(void);
* must always be taken outside other locks. This function helps
* functions take different paths depending on whether the current
* thread is running within the main loop mutex.
+ *
+ * This function should never be used in the block layer, because
+ * unit tests, block layer tools and qemu-storage-daemon do not
+ * have a BQL.
+ * Please instead refer to qemu_in_main_thread().
*/
bool qemu_mutex_iothread_locked(void);
/**
+ * qemu_in_main_thread: return whether it's possible to safely access
+ * the global state of the block layer.
+ *
+ * Global state of the block layer is not accessible from I/O threads
+ * or worker threads; only from threads that "own" the default
+ * AioContext that qemu_get_aio_context() returns. For tests, block
+ * layer tools and qemu-storage-daemon there is a designated thread that
+ * runs the event loop for qemu_get_aio_context(), and that is the
+ * main thread.
+ *
+ * For emulators, however, any thread that holds the BQL can act
+ * as the block layer main thread; this will be any of the actual
+ * main thread, the vCPU threads or the RCU thread.
+ *
+ * For clarity, do not use this function outside the block layer.
+ */
+bool qemu_in_main_thread(void);
+
+/* Mark and check that the function is part of the global state API. */
+#define GLOBAL_STATE_CODE() \
+ do { \
+ assert(qemu_in_main_thread()); \
+ } while (0)
+
+/* Mark and check that the function is part of the I/O API. */
+#define IO_CODE() \
+ do { \
+ /* nop */ \
+ } while (0)
+
+/* Mark and check that the function is part of the "I/O OR GS" API. */
+#define IO_OR_GS_CODE() \
+ do { \
+ /* nop */ \
+ } while (0)
+
+/**
* qemu_mutex_lock_iothread: Lock the main loop mutex.
*
* This function locks the main loop mutex. The mutex is taken by
diff --git a/include/qemu/memalign.h b/include/qemu/memalign.h
new file mode 100644
index 0000000000..fa299f3bf6
--- /dev/null
+++ b/include/qemu/memalign.h
@@ -0,0 +1,61 @@
+/*
+ * Allocation and free functions for aligned memory
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_MEMALIGN_H
+#define QEMU_MEMALIGN_H
+
+/**
+ * qemu_try_memalign: Allocate aligned memory
+ * @alignment: required alignment, in bytes
+ * @size: size of allocation, in bytes
+ *
+ * Allocate memory on an aligned boundary (i.e. the returned
+ * address will be an exact multiple of @alignment).
+ * @alignment must be a power of 2, or the function will assert().
+ * On success, returns allocated memory; on failure, returns NULL.
+ *
+ * The memory allocated through this function must be freed via
+ * qemu_vfree() (and not via free()).
+ */
+void *qemu_try_memalign(size_t alignment, size_t size);
+/**
+ * qemu_memalign: Allocate aligned memory, without failing
+ * @alignment: required alignment, in bytes
+ * @size: size of allocation, in bytes
+ *
+ * Allocate memory in the same way as qemu_try_memalign(), but
+ * abort() with an error message if the memory allocation fails.
+ *
+ * The memory allocated through this function must be freed via
+ * qemu_vfree() (and not via free()).
+ */
+void *qemu_memalign(size_t alignment, size_t size);
+/**
+ * qemu_vfree: Free memory allocated through qemu_memalign
+ * @ptr: memory to free
+ *
+ * This function must be used to free memory allocated via qemu_memalign()
+ * or qemu_try_memalign(). (Using the wrong free function will cause
+ * subtle bugs on Windows hosts.)
+ */
+void qemu_vfree(void *ptr);
+/*
+ * It's an analog of GLIB's g_autoptr_cleanup_generic_gfree(), used to define
+ * g_autofree macro.
+ */
+static inline void qemu_cleanup_generic_vfree(void *p)
+{
+ void **pp = (void **)p;
+ qemu_vfree(*pp);
+}
+
+/*
+ * Analog of g_autofree, but qemu_vfree is called on cleanup instead of g_free.
+ */
+#define QEMU_AUTO_VFREE __attribute__((cleanup(qemu_cleanup_generic_vfree)))
+
+#endif
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 7bcce3bceb..c9ec7830c9 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -379,28 +379,10 @@ extern "C" {
#endif
int qemu_daemon(int nochdir, int noclose);
-void *qemu_try_memalign(size_t alignment, size_t size);
-void *qemu_memalign(size_t alignment, size_t size);
void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
bool noreserve);
-void qemu_vfree(void *ptr);
void qemu_anon_ram_free(void *ptr, size_t size);
-/*
- * It's an analog of GLIB's g_autoptr_cleanup_generic_gfree(), used to define
- * g_autofree macro.
- */
-static inline void qemu_cleanup_generic_vfree(void *p)
-{
- void **pp = (void **)p;
- qemu_vfree(*pp);
-}
-
-/*
- * Analog of g_autofree, but qemu_vfree is called on cleanup instead of g_free.
- */
-#define QEMU_AUTO_VFREE __attribute__((cleanup(qemu_cleanup_generic_vfree)))
-
#ifdef _WIN32
#define HAVE_CHARDEV_SERIAL 1
#elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \
@@ -673,19 +655,6 @@ static inline int platform_does_not_support_system(const char *command)
}
#endif /* !HAVE_SYSTEM_FUNCTION */
-/**
- * Duplicate directory entry @dent.
- *
- * It is highly recommended to use this function instead of open coding
- * duplication of @c dirent objects, because the actual @c struct @c dirent
- * size may be bigger or shorter than @c sizeof(struct dirent) and correct
- * handling is platform specific (see gitlab issue #841).
- *
- * @dent - original directory entry to be duplicated
- * @returns duplicated directory entry which should be freed with g_free()
- */
-struct dirent *qemu_dirent_dup(struct dirent *dent);
-
#ifdef __cplusplus
}
#endif
diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index e69efbd47f..b063c6fde8 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -29,6 +29,7 @@
#include "qemu/atomic.h"
#include "qemu/notify.h"
#include "qemu/sys_membarrier.h"
+#include "qemu/coroutine-tls.h"
#ifdef __cplusplus
extern "C" {
@@ -76,11 +77,11 @@ struct rcu_reader_data {
NotifierList force_rcu;
};
-extern __thread struct rcu_reader_data rcu_reader;
+QEMU_DECLARE_CO_TLS(struct rcu_reader_data, rcu_reader)
static inline void rcu_read_lock(void)
{
- struct rcu_reader_data *p_rcu_reader = &rcu_reader;
+ struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();
unsigned ctr;
if (p_rcu_reader->depth++ > 0) {
@@ -96,7 +97,7 @@ static inline void rcu_read_lock(void)
static inline void rcu_read_unlock(void)
{
- struct rcu_reader_data *p_rcu_reader = &rcu_reader;
+ struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();
assert(p_rcu_reader->depth != 0);
if (--p_rcu_reader->depth > 0) {
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 5b302cb214..42f4ceb701 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -26,6 +26,7 @@ typedef struct AddressSpace AddressSpace;
typedef struct AioContext AioContext;
typedef struct Aml Aml;
typedef struct AnnounceTimer AnnounceTimer;
+typedef struct ArchCPU ArchCPU;
typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
typedef struct BdrvDirtyBitmapIter BdrvDirtyBitmapIter;
typedef struct BlockBackend BlockBackend;
@@ -39,6 +40,7 @@ typedef struct CompatProperty CompatProperty;
typedef struct CoMutex CoMutex;
typedef struct ConfidentialGuestSupport ConfidentialGuestSupport;
typedef struct CPUAddressSpace CPUAddressSpace;
+typedef struct CPUArchState CPUArchState;
typedef struct CPUState CPUState;
typedef struct DeviceListener DeviceListener;
typedef struct DeviceState DeviceState;
diff --git a/include/qemu/xattr.h b/include/qemu/xattr.h
index a83fe8e749..f1d0f7be74 100644
--- a/include/qemu/xattr.h
+++ b/include/qemu/xattr.h
@@ -22,7 +22,9 @@
#ifdef CONFIG_LIBATTR
# include <attr/xattr.h>
#else
-# define ENOATTR ENODATA
+# if !defined(ENOATTR)
+# define ENOATTR ENODATA
+# endif
# include <sys/xattr.h>
#endif
diff --git a/include/sysemu/accel-ops.h b/include/sysemu/accel-ops.h
index 032f6979d7..6013c9444c 100644
--- a/include/sysemu/accel-ops.h
+++ b/include/sysemu/accel-ops.h
@@ -28,8 +28,11 @@ struct AccelOpsClass {
/* initialization function called when accel is chosen */
void (*ops_init)(AccelOpsClass *ops);
+ bool (*cpus_are_resettable)(void);
+
void (*create_vcpu_thread)(CPUState *cpu); /* MANDATORY NON-NULL */
void (*kick_vcpu_thread)(CPUState *cpu);
+ bool (*cpu_thread_is_idle)(CPUState *cpu);
void (*synchronize_post_reset)(CPUState *cpu);
void (*synchronize_post_init)(CPUState *cpu);
diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h
index 70c579560a..79c2591425 100644
--- a/include/sysemu/arch_init.h
+++ b/include/sysemu/arch_init.h
@@ -28,4 +28,6 @@ enum {
extern const uint32_t arch_type;
+void qemu_init_arch_modules(void);
+
#endif
diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h
new file mode 100644
index 0000000000..2391679c56
--- /dev/null
+++ b/include/sysemu/block-backend-common.h
@@ -0,0 +1,102 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later. See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_COMMON_H
+#define BLOCK_BACKEND_COMMON_H
+
+#include "qemu/iov.h"
+#include "block/throttle-groups.h"
+
+/*
+ * TODO Have to include block/block.h for a bunch of block layer
+ * types. Unfortunately, this pulls in the whole BlockDriverState
+ * API, which we don't want used by many BlockBackend users. Some of
+ * the types belong here, and the rest should be split into a common
+ * header and one for the BlockDriverState API.
+ */
+#include "block/block.h"
+
+/* Callbacks for block device models */
+typedef struct BlockDevOps {
+
+ /*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+ /*
+ * Runs when virtual media changed (monitor commands eject, change)
+ * Argument load is true on load and false on eject.
+ * Beware: doesn't run when a host device's physical media
+ * changes. Sure would be useful if it did.
+ * Device models with removable media must implement this callback.
+ */
+ void (*change_media_cb)(void *opaque, bool load, Error **errp);
+ /*
+ * Runs when an eject request is issued from the monitor, the tray
+ * is closed, and the medium is locked.
+ * Device models that do not implement is_medium_locked will not need
+ * this callback. Device models that can lock the medium or tray might
+ * want to implement the callback and unlock the tray when "force" is
+ * true, even if they do not support eject requests.
+ */
+ void (*eject_request_cb)(void *opaque, bool force);
+
+ /*
+ * Is the virtual medium locked into the device?
+ * Device models implement this only when device has such a lock.
+ */
+ bool (*is_medium_locked)(void *opaque);
+
+ /*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+ /*
+ * Is the virtual tray open?
+ * Device models implement this only when the device has a tray.
+ */
+ bool (*is_tray_open)(void *opaque);
+
+ /*
+ * Runs when the size changed (e.g. monitor command block_resize)
+ */
+ void (*resize_cb)(void *opaque);
+ /*
+ * Runs when the backend receives a drain request.
+ */
+ void (*drained_begin)(void *opaque);
+ /*
+ * Runs when the backend's last drain request ends.
+ */
+ void (*drained_end)(void *opaque);
+ /*
+ * Is the device still busy?
+ */
+ bool (*drained_poll)(void *opaque);
+} BlockDevOps;
+
+/*
+ * This struct is embedded in (the private) BlockBackend struct and contains
+ * fields that must be public. This is in particular for QLIST_ENTRY() and
+ * friends so that BlockBackends can be kept in lists outside block-backend.c
+ */
+typedef struct BlockBackendPublic {
+ ThrottleGroupMember throttle_group_member;
+} BlockBackendPublic;
+
+#endif /* BLOCK_BACKEND_COMMON_H */
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
new file mode 100644
index 0000000000..2e93a74679
--- /dev/null
+++ b/include/sysemu/block-backend-global-state.h
@@ -0,0 +1,116 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later. See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_GS_H
+#define BLOCK_BACKEND_GS_H
+
+#include "block-backend-common.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
+BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
+ uint64_t shared_perm, Error **errp);
+BlockBackend *blk_new_open(const char *filename, const char *reference,
+ QDict *options, int flags, Error **errp);
+int blk_get_refcnt(BlockBackend *blk);
+void blk_ref(BlockBackend *blk);
+void blk_unref(BlockBackend *blk);
+void blk_remove_all_bs(void);
+BlockBackend *blk_by_name(const char *name);
+BlockBackend *blk_next(BlockBackend *blk);
+BlockBackend *blk_all_next(BlockBackend *blk);
+bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp);
+void monitor_remove_blk(BlockBackend *blk);
+
+BlockBackendPublic *blk_get_public(BlockBackend *blk);
+BlockBackend *blk_by_public(BlockBackendPublic *public);
+
+void blk_remove_bs(BlockBackend *blk);
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
+int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
+bool bdrv_has_blk(BlockDriverState *bs);
+bool bdrv_is_root_node(BlockDriverState *bs);
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp);
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
+
+void blk_iostatus_enable(BlockBackend *blk);
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
+void blk_iostatus_disable(BlockBackend *blk);
+void blk_iostatus_reset(BlockBackend *blk);
+int blk_attach_dev(BlockBackend *blk, DeviceState *dev);
+void blk_detach_dev(BlockBackend *blk, DeviceState *dev);
+DeviceState *blk_get_attached_dev(BlockBackend *blk);
+BlockBackend *blk_by_dev(void *dev);
+BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
+void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
+
+void blk_activate(BlockBackend *blk, Error **errp);
+
+int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
+void blk_aio_cancel(BlockAIOCB *acb);
+int blk_commit_all(void);
+void blk_drain(BlockBackend *blk);
+void blk_drain_all(void);
+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error);
+bool blk_supports_write_perm(BlockBackend *blk);
+bool blk_is_sg(BlockBackend *blk);
+void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
+int blk_get_flags(BlockBackend *blk);
+bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
+void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason);
+void blk_op_block_all(BlockBackend *blk, Error *reason);
+void blk_op_unblock_all(BlockBackend *blk, Error *reason);
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
+ Error **errp);
+void blk_add_aio_context_notifier(BlockBackend *blk,
+ void (*attached_aio_context)(AioContext *new_context, void *opaque),
+ void (*detach_aio_context)(void *opaque), void *opaque);
+void blk_remove_aio_context_notifier(BlockBackend *blk,
+ void (*attached_aio_context)(AioContext *,
+ void *),
+ void (*detach_aio_context)(void *),
+ void *opaque);
+void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify);
+void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify);
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
+void blk_update_root_state(BlockBackend *blk);
+bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
+int blk_get_open_flags_from_root_state(BlockBackend *blk);
+
+int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
+ int64_t pos, int size);
+int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size);
+int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz);
+int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo);
+
+void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg);
+void blk_io_limits_disable(BlockBackend *blk);
+void blk_io_limits_enable(BlockBackend *blk, const char *group);
+void blk_io_limits_update_group(BlockBackend *blk, const char *group);
+void blk_set_force_allow_inactivate(BlockBackend *blk);
+
+void blk_register_buf(BlockBackend *blk, void *host, size_t size);
+void blk_unregister_buf(BlockBackend *blk, void *host);
+
+const BdrvChild *blk_root(BlockBackend *blk);
+
+int blk_make_empty(BlockBackend *blk, Error **errp);
+
+#endif /* BLOCK_BACKEND_GS_H */
diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h
new file mode 100644
index 0000000000..6517c39295
--- /dev/null
+++ b/include/sysemu/block-backend-io.h
@@ -0,0 +1,161 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later. See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_IO_H
+#define BLOCK_BACKEND_IO_H
+
+#include "block-backend-common.h"
+
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+const char *blk_name(const BlockBackend *blk);
+
+BlockDriverState *blk_bs(BlockBackend *blk);
+
+void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
+void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
+bool blk_iostatus_is_enabled(const BlockBackend *blk);
+
+char *blk_get_attached_dev_id(BlockBackend *blk);
+
+BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque);
+
+BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_flush(BlockBackend *blk,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes,
+ BlockCompletionFunc *cb, void *opaque);
+void blk_aio_cancel_async(BlockAIOCB *acb);
+BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
+ BlockCompletionFunc *cb, void *opaque);
+
+void blk_inc_in_flight(BlockBackend *blk);
+void blk_dec_in_flight(BlockBackend *blk);
+bool blk_is_inserted(BlockBackend *blk);
+bool blk_is_available(BlockBackend *blk);
+void blk_lock_medium(BlockBackend *blk, bool locked);
+void blk_eject(BlockBackend *blk, bool eject_flag);
+int64_t blk_getlength(BlockBackend *blk);
+void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
+int64_t blk_nb_sectors(BlockBackend *blk);
+void *blk_try_blockalign(BlockBackend *blk, size_t size);
+void *blk_blockalign(BlockBackend *blk, size_t size);
+bool blk_is_writable(BlockBackend *blk);
+bool blk_enable_write_cache(BlockBackend *blk);
+BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
+BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
+ int error);
+void blk_error_action(BlockBackend *blk, BlockErrorAction action,
+ bool is_read, int error);
+void blk_iostatus_set_err(BlockBackend *blk, int error);
+int blk_get_max_iov(BlockBackend *blk);
+int blk_get_max_hw_iov(BlockBackend *blk);
+void blk_set_guest_block_size(BlockBackend *blk, int align);
+
+void blk_io_plug(BlockBackend *blk);
+void blk_io_unplug(BlockBackend *blk);
+AioContext *blk_get_aio_context(BlockBackend *blk);
+BlockAcctStats *blk_get_stats(BlockBackend *blk);
+void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
+ BlockCompletionFunc *cb,
+ void *opaque, int ret);
+
+uint32_t blk_get_request_alignment(BlockBackend *blk);
+uint32_t blk_get_max_transfer(BlockBackend *blk);
+uint64_t blk_get_max_hw_transfer(BlockBackend *blk);
+
+int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
+ BlockBackend *blk_out, int64_t off_out,
+ int64_t bytes, BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * See include/block/block-io.h for more information about
+ * the "I/O or GS" API.
+ */
+
+int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes);
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
+ BdrvRequestFlags flags);
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+ int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
+ int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+ int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+
+static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset,
+ int64_t bytes, void *buf,
+ BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
+
+ assert(bytes <= SIZE_MAX);
+
+ return blk_co_preadv(blk, offset, bytes, &qiov, flags);
+}
+
+static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset,
+ int64_t bytes, void *buf,
+ BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
+
+ assert(bytes <= SIZE_MAX);
+
+ return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
+}
+
+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
+ int64_t bytes);
+
+int coroutine_fn blk_co_flush(BlockBackend *blk);
+int blk_flush(BlockBackend *blk);
+
+int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
+
+int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
+ int64_t bytes);
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
+int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags);
+int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
+
+#endif /* BLOCK_BACKEND_IO_H */
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index e5e1524f06..038be9fc40 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -13,272 +13,9 @@
#ifndef BLOCK_BACKEND_H
#define BLOCK_BACKEND_H
-#include "qemu/iov.h"
-#include "block/throttle-groups.h"
+#include "block-backend-global-state.h"
+#include "block-backend-io.h"
-/*
- * TODO Have to include block/block.h for a bunch of block layer
- * types. Unfortunately, this pulls in the whole BlockDriverState
- * API, which we don't want used by many BlockBackend users. Some of
- * the types belong here, and the rest should be split into a common
- * header and one for the BlockDriverState API.
- */
-#include "block/block.h"
-
-/* Callbacks for block device models */
-typedef struct BlockDevOps {
- /*
- * Runs when virtual media changed (monitor commands eject, change)
- * Argument load is true on load and false on eject.
- * Beware: doesn't run when a host device's physical media
- * changes. Sure would be useful if it did.
- * Device models with removable media must implement this callback.
- */
- void (*change_media_cb)(void *opaque, bool load, Error **errp);
- /*
- * Runs when an eject request is issued from the monitor, the tray
- * is closed, and the medium is locked.
- * Device models that do not implement is_medium_locked will not need
- * this callback. Device models that can lock the medium or tray might
- * want to implement the callback and unlock the tray when "force" is
- * true, even if they do not support eject requests.
- */
- void (*eject_request_cb)(void *opaque, bool force);
- /*
- * Is the virtual tray open?
- * Device models implement this only when the device has a tray.
- */
- bool (*is_tray_open)(void *opaque);
- /*
- * Is the virtual medium locked into the device?
- * Device models implement this only when device has such a lock.
- */
- bool (*is_medium_locked)(void *opaque);
- /*
- * Runs when the size changed (e.g. monitor command block_resize)
- */
- void (*resize_cb)(void *opaque);
- /*
- * Runs when the backend receives a drain request.
- */
- void (*drained_begin)(void *opaque);
- /*
- * Runs when the backend's last drain request ends.
- */
- void (*drained_end)(void *opaque);
- /*
- * Is the device still busy?
- */
- bool (*drained_poll)(void *opaque);
-} BlockDevOps;
-
-/* This struct is embedded in (the private) BlockBackend struct and contains
- * fields that must be public. This is in particular for QLIST_ENTRY() and
- * friends so that BlockBackends can be kept in lists outside block-backend.c
- * */
-typedef struct BlockBackendPublic {
- ThrottleGroupMember throttle_group_member;
-} BlockBackendPublic;
-
-BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
-BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
- uint64_t shared_perm, Error **errp);
-BlockBackend *blk_new_open(const char *filename, const char *reference,
- QDict *options, int flags, Error **errp);
-int blk_get_refcnt(BlockBackend *blk);
-void blk_ref(BlockBackend *blk);
-void blk_unref(BlockBackend *blk);
-void blk_remove_all_bs(void);
-const char *blk_name(const BlockBackend *blk);
-BlockBackend *blk_by_name(const char *name);
-BlockBackend *blk_next(BlockBackend *blk);
-BlockBackend *blk_all_next(BlockBackend *blk);
-bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp);
-void monitor_remove_blk(BlockBackend *blk);
-
-BlockBackendPublic *blk_get_public(BlockBackend *blk);
-BlockBackend *blk_by_public(BlockBackendPublic *public);
-
-BlockDriverState *blk_bs(BlockBackend *blk);
-void blk_remove_bs(BlockBackend *blk);
-int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
-int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
-bool bdrv_has_blk(BlockDriverState *bs);
-bool bdrv_is_root_node(BlockDriverState *bs);
-int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
- Error **errp);
-void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
-
-void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
-void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
-void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
-void blk_iostatus_enable(BlockBackend *blk);
-bool blk_iostatus_is_enabled(const BlockBackend *blk);
-BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
-void blk_iostatus_disable(BlockBackend *blk);
-void blk_iostatus_reset(BlockBackend *blk);
-void blk_iostatus_set_err(BlockBackend *blk, int error);
-int blk_attach_dev(BlockBackend *blk, DeviceState *dev);
-void blk_detach_dev(BlockBackend *blk, DeviceState *dev);
-DeviceState *blk_get_attached_dev(BlockBackend *blk);
-char *blk_get_attached_dev_id(BlockBackend *blk);
-BlockBackend *blk_by_dev(void *dev);
-BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
-void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
-int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
- int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
- int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset,
- BdrvRequestFlags flags);
-int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
- int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-
-static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset,
- int64_t bytes, void *buf,
- BdrvRequestFlags flags)
-{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
- assert(bytes <= SIZE_MAX);
-
- return blk_co_preadv(blk, offset, bytes, &qiov, flags);
-}
-
-static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset,
- int64_t bytes, void *buf,
- BdrvRequestFlags flags)
-{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
- assert(bytes <= SIZE_MAX);
-
- return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
-}
-
-int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags);
-BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque);
-int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
-int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes);
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
- BdrvRequestFlags flags);
-int64_t blk_getlength(BlockBackend *blk);
-void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
-int64_t blk_nb_sectors(BlockBackend *blk);
-BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
- QEMUIOVector *qiov, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
- QEMUIOVector *qiov, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *blk_aio_flush(BlockBackend *blk,
- BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes,
- BlockCompletionFunc *cb, void *opaque);
-void blk_aio_cancel(BlockAIOCB *acb);
-void blk_aio_cancel_async(BlockAIOCB *acb);
-int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
-BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
- BlockCompletionFunc *cb, void *opaque);
-int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
- int64_t bytes);
-int coroutine_fn blk_co_flush(BlockBackend *blk);
-int blk_flush(BlockBackend *blk);
-int blk_commit_all(void);
-void blk_inc_in_flight(BlockBackend *blk);
-void blk_dec_in_flight(BlockBackend *blk);
-void blk_drain(BlockBackend *blk);
-void blk_drain_all(void);
-void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
- BlockdevOnError on_write_error);
-BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
-BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
- int error);
-void blk_error_action(BlockBackend *blk, BlockErrorAction action,
- bool is_read, int error);
-bool blk_supports_write_perm(BlockBackend *blk);
-bool blk_is_writable(BlockBackend *blk);
-bool blk_is_sg(BlockBackend *blk);
-bool blk_enable_write_cache(BlockBackend *blk);
-void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
-void blk_invalidate_cache(BlockBackend *blk, Error **errp);
-bool blk_is_inserted(BlockBackend *blk);
-bool blk_is_available(BlockBackend *blk);
-void blk_lock_medium(BlockBackend *blk, bool locked);
-void blk_eject(BlockBackend *blk, bool eject_flag);
-int blk_get_flags(BlockBackend *blk);
-uint32_t blk_get_request_alignment(BlockBackend *blk);
-uint32_t blk_get_max_transfer(BlockBackend *blk);
-uint64_t blk_get_max_hw_transfer(BlockBackend *blk);
-int blk_get_max_iov(BlockBackend *blk);
-int blk_get_max_hw_iov(BlockBackend *blk);
-void blk_set_guest_block_size(BlockBackend *blk, int align);
-void *blk_try_blockalign(BlockBackend *blk, size_t size);
-void *blk_blockalign(BlockBackend *blk, size_t size);
-bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
-void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason);
-void blk_op_block_all(BlockBackend *blk, Error *reason);
-void blk_op_unblock_all(BlockBackend *blk, Error *reason);
-AioContext *blk_get_aio_context(BlockBackend *blk);
-int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
- Error **errp);
-void blk_add_aio_context_notifier(BlockBackend *blk,
- void (*attached_aio_context)(AioContext *new_context, void *opaque),
- void (*detach_aio_context)(void *opaque), void *opaque);
-void blk_remove_aio_context_notifier(BlockBackend *blk,
- void (*attached_aio_context)(AioContext *,
- void *),
- void (*detach_aio_context)(void *),
- void *opaque);
-void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify);
-void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify);
-void blk_io_plug(BlockBackend *blk);
-void blk_io_unplug(BlockBackend *blk);
-BlockAcctStats *blk_get_stats(BlockBackend *blk);
-BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
-void blk_update_root_state(BlockBackend *blk);
-bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
-int blk_get_open_flags_from_root_state(BlockBackend *blk);
-
-void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
- BlockCompletionFunc *cb, void *opaque);
-int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags);
-int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
- int64_t bytes);
-int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
- PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
-int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
- int64_t pos, int size);
-int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size);
-int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz);
-int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo);
-BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
- BlockCompletionFunc *cb,
- void *opaque, int ret);
-
-void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg);
-void blk_io_limits_disable(BlockBackend *blk);
-void blk_io_limits_enable(BlockBackend *blk, const char *group);
-void blk_io_limits_update_group(BlockBackend *blk, const char *group);
-void blk_set_force_allow_inactivate(BlockBackend *blk);
-
-void blk_register_buf(BlockBackend *blk, void *host, size_t size);
-void blk_unregister_buf(BlockBackend *blk, void *host);
-
-int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
- BlockBackend *blk_out, int64_t off_out,
- int64_t bytes, BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-
-const BdrvChild *blk_root(BlockBackend *blk);
-
-int blk_make_empty(BlockBackend *blk, Error **errp);
+/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
#endif
diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index f9fb54d437..3211b16513 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -13,9 +13,6 @@
#include "block/block.h"
#include "qemu/queue.h"
-void blockdev_mark_auto_del(BlockBackend *blk);
-void blockdev_auto_del(BlockBackend *blk);
-
typedef enum {
IF_DEFAULT = -1, /* for use with drive_add() only */
/*
@@ -38,6 +35,16 @@ struct DriveInfo {
QTAILQ_ENTRY(DriveInfo) next;
};
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+void blockdev_mark_auto_del(BlockBackend *blk);
+void blockdev_auto_del(BlockBackend *blk);
+
DriveInfo *blk_legacy_dinfo(BlockBackend *blk);
DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo);
BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo);
diff --git a/include/sysemu/hax.h b/include/sysemu/hax.h
index 247f0661d1..bf8f99a824 100644
--- a/include/sysemu/hax.h
+++ b/include/sysemu/hax.h
@@ -25,17 +25,23 @@
int hax_sync_vcpus(void);
#ifdef NEED_CPU_H
+# ifdef CONFIG_HAX
+# define CONFIG_HAX_IS_POSSIBLE
+# endif
+#else /* !NEED_CPU_H */
+# define CONFIG_HAX_IS_POSSIBLE
+#endif
-#ifdef CONFIG_HAX
+#ifdef CONFIG_HAX_IS_POSSIBLE
-int hax_enabled(void);
+extern bool hax_allowed;
-#else /* CONFIG_HAX */
+#define hax_enabled() (hax_allowed)
-#define hax_enabled() (0)
+#else /* !CONFIG_HAX_IS_POSSIBLE */
-#endif /* CONFIG_HAX */
+#define hax_enabled() (0)
-#endif /* NEED_CPU_H */
+#endif /* CONFIG_HAX_IS_POSSIBLE */
#endif /* QEMU_HAX_H */
diff --git a/include/sysemu/hw_accel.h b/include/sysemu/hw_accel.h
index 01b5ebf442..22903a55f7 100644
--- a/include/sysemu/hw_accel.h
+++ b/include/sysemu/hw_accel.h
@@ -23,9 +23,4 @@ void cpu_synchronize_post_reset(CPUState *cpu);
void cpu_synchronize_post_init(CPUState *cpu);
void cpu_synchronize_pre_loadvm(CPUState *cpu);
-static inline bool cpu_check_are_resettable(void)
-{
- return kvm_enabled() ? kvm_cpu_check_are_resettable() : true;
-}
-
#endif /* QEMU_HW_ACCEL_H */
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 6eb39a088b..a5bec96fb0 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -249,6 +249,9 @@ int kvm_has_intx_set_mask(void);
bool kvm_arm_supports_user_irq(void);
+int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
+int kvm_on_sigbus(int code, void *addr);
+
#ifdef NEED_CPU_H
#include "cpu.h"
@@ -261,9 +264,6 @@ int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
void kvm_remove_all_breakpoints(CPUState *cpu);
int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap);
-int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
-int kvm_on_sigbus(int code, void *addr);
-
/* internal API */
int kvm_ioctl(KVMState *s, int type, ...);
diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h
index 4b20f1a639..3bbeb1bcb4 100644
--- a/include/sysemu/memory_mapping.h
+++ b/include/sysemu/memory_mapping.h
@@ -15,8 +15,7 @@
#define MEMORY_MAPPING_H
#include "qemu/queue.h"
-#include "exec/cpu-defs.h"
-#include "exec/memory.h"
+#include "exec/cpu-common.h"
typedef struct GuestPhysBlock {
/* visible to guest, reflects PCI hole, etc */
@@ -43,7 +42,7 @@ typedef struct GuestPhysBlockList {
/* The physical and virtual address in the memory mapping are contiguous. */
typedef struct MemoryMapping {
hwaddr phys_addr;
- target_ulong virt_addr;
+ vaddr virt_addr;
ram_addr_t length;
QTAILQ_ENTRY(MemoryMapping) next;
} MemoryMapping;
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index 2edf33658a..dd64fb401d 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -55,6 +55,7 @@ int os_mlock(void);
typedef struct timeval qemu_timeval;
#define qemu_gettimeofday(tp) gettimeofday(tp, NULL)
+int os_set_daemonize(bool d);
bool is_daemonized(void);
/**
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index 43f569b5c2..770752222a 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -77,6 +77,14 @@ typedef struct {
} qemu_timeval;
int qemu_gettimeofday(qemu_timeval *tp);
+static inline int os_set_daemonize(bool d)
+{
+ if (d) {
+ return -ENOTSUP;
+ }
+ return 0;
+}
+
static inline bool is_daemonized(void)
{
return false;
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index 675873e200..dd444734d9 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -245,6 +245,9 @@ DEF(or_vec, 1, 2, 0, IMPLVEC)
DEF(xor_vec, 1, 2, 0, IMPLVEC)
DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
+DEF(nand_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nand_vec))
+DEF(nor_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nor_vec))
+DEF(eqv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_eqv_vec))
DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 42f5b500ed..73869fd9d0 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -43,7 +43,7 @@
#else
#define MAX_OPC_PARAM_PER_ARG 1
#endif
-#define MAX_OPC_PARAM_IARGS 6
+#define MAX_OPC_PARAM_IARGS 7
#define MAX_OPC_PARAM_OARGS 1
#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS)
@@ -183,6 +183,9 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_not_vec 0
#define TCG_TARGET_HAS_andc_vec 0
#define TCG_TARGET_HAS_orc_vec 0
+#define TCG_TARGET_HAS_nand_vec 0
+#define TCG_TARGET_HAS_nor_vec 0
+#define TCG_TARGET_HAS_eqv_vec 0
#define TCG_TARGET_HAS_roti_vec 0
#define TCG_TARGET_HAS_rots_vec 0
#define TCG_TARGET_HAS_rotv_vec 0
diff --git a/job.c b/job.c
index 54db80df66..075c6f3a20 100644
--- a/job.c
+++ b/job.c
@@ -381,6 +381,8 @@ void job_ref(Job *job)
void job_unref(Job *job)
{
+ GLOBAL_STATE_CODE();
+
if (--job->refcnt == 0) {
assert(job->status == JOB_STATUS_NULL);
assert(!timer_pending(&job->sleep_timer));
@@ -602,6 +604,7 @@ bool job_user_paused(Job *job)
void job_user_resume(Job *job, Error **errp)
{
assert(job);
+ GLOBAL_STATE_CODE();
if (!job->user_paused || job->pause_count <= 0) {
error_setg(errp, "Can't resume a job that was not paused");
return;
@@ -672,6 +675,7 @@ static void job_update_rc(Job *job)
static void job_commit(Job *job)
{
assert(!job->ret);
+ GLOBAL_STATE_CODE();
if (job->driver->commit) {
job->driver->commit(job);
}
@@ -680,6 +684,7 @@ static void job_commit(Job *job)
static void job_abort(Job *job)
{
assert(job->ret);
+ GLOBAL_STATE_CODE();
if (job->driver->abort) {
job->driver->abort(job);
}
@@ -687,6 +692,7 @@ static void job_abort(Job *job)
static void job_clean(Job *job)
{
+ GLOBAL_STATE_CODE();
if (job->driver->clean) {
job->driver->clean(job);
}
@@ -726,6 +732,7 @@ static int job_finalize_single(Job *job)
static void job_cancel_async(Job *job, bool force)
{
+ GLOBAL_STATE_CODE();
if (job->driver->cancel) {
force = job->driver->cancel(job, force);
} else {
@@ -825,6 +832,7 @@ static void job_completed_txn_abort(Job *job)
static int job_prepare(Job *job)
{
+ GLOBAL_STATE_CODE();
if (job->ret == 0 && job->driver->prepare) {
job->ret = job->driver->prepare(job);
job_update_rc(job);
@@ -952,6 +960,7 @@ static void coroutine_fn job_co_entry(void *opaque)
Job *job = opaque;
assert(job && job->driver && job->driver->run);
+ assert(job->aio_context == qemu_get_current_aio_context());
job_pause_point(job);
job->ret = job->driver->run(job, &job->err);
job->deferred_to_main_loop = true;
@@ -1054,6 +1063,7 @@ void job_complete(Job *job, Error **errp)
{
/* Should not be reachable via external interface for internal jobs */
assert(job->id);
+ GLOBAL_STATE_CODE();
if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) {
return;
}
diff --git a/meson.build b/meson.build
index 28612fca36..2d6601467f 100644
--- a/meson.build
+++ b/meson.build
@@ -1462,14 +1462,16 @@ dbus_display = get_option('dbus_display') \
.allowed()
have_virtfs = get_option('virtfs') \
- .require(targetos == 'linux',
- error_message: 'virtio-9p (virtfs) requires Linux') \
- .require(libattr.found() and libcap_ng.found(),
- error_message: 'virtio-9p (virtfs) requires libcap-ng-devel and libattr-devel') \
+ .require(targetos == 'linux' or targetos == 'darwin',
+ error_message: 'virtio-9p (virtfs) requires Linux or macOS') \
+ .require(targetos == 'linux' or cc.has_function('pthread_fchdir_np'),
+ error_message: 'virtio-9p (virtfs) on macOS requires the presence of pthread_fchdir_np') \
+ .require(targetos == 'darwin' or (libattr.found() and libcap_ng.found()),
+ error_message: 'virtio-9p (virtfs) on Linux requires libcap-ng-devel and libattr-devel') \
.disable_auto_if(not have_tools and not have_system) \
.allowed()
-have_virtfs_proxy_helper = have_virtfs and have_tools
+have_virtfs_proxy_helper = targetos != 'darwin' and have_virtfs and have_tools
foreach k : get_option('trace_backends')
config_host_data.set('CONFIG_TRACE_' + k.to_upper(), true)
@@ -1619,9 +1621,15 @@ config_host_data.set('CONFIG_CLOCK_ADJTIME', cc.has_function('clock_adjtime'))
config_host_data.set('CONFIG_DUP3', cc.has_function('dup3'))
config_host_data.set('CONFIG_FALLOCATE', cc.has_function('fallocate'))
config_host_data.set('CONFIG_POSIX_FALLOCATE', cc.has_function('posix_fallocate'))
-config_host_data.set('CONFIG_POSIX_MEMALIGN', cc.has_function('posix_memalign'))
+# Note that we need to specify prefix: here to avoid incorrectly
+# thinking that Windows has posix_memalign()
+config_host_data.set('CONFIG_POSIX_MEMALIGN', cc.has_function('posix_memalign', prefix: '#include <stdlib.h>'))
+config_host_data.set('CONFIG_ALIGNED_MALLOC', cc.has_function('_aligned_malloc'))
+config_host_data.set('CONFIG_VALLOC', cc.has_function('valloc'))
+config_host_data.set('CONFIG_MEMALIGN', cc.has_function('memalign'))
config_host_data.set('CONFIG_PPOLL', cc.has_function('ppoll'))
config_host_data.set('CONFIG_PREADV', cc.has_function('preadv', prefix: '#include <sys/uio.h>'))
+config_host_data.set('CONFIG_PTHREAD_FCHDIR_NP', cc.has_function('pthread_fchdir_np'))
config_host_data.set('CONFIG_SEM_TIMEDWAIT', cc.has_function('sem_timedwait', dependencies: threads))
config_host_data.set('CONFIG_SENDFILE', cc.has_function('sendfile'))
config_host_data.set('CONFIG_SETNS', cc.has_function('setns') and cc.has_function('unshare'))
@@ -2432,8 +2440,8 @@ if get_option('cfi') and slirp_opt == 'system'
endif
fdt = not_found
-fdt_opt = get_option('fdt')
if have_system
+ fdt_opt = get_option('fdt')
if fdt_opt in ['enabled', 'auto', 'system']
have_internal = fs.exists(meson.current_source_dir() / 'dtc/libfdt/Makefile.libfdt')
fdt = cc.find_library('fdt', kwargs: static_kwargs,
@@ -2476,6 +2484,8 @@ if have_system
fdt = declare_dependency(link_with: libfdt,
include_directories: fdt_inc)
endif
+else
+ fdt_opt = 'disabled'
endif
if not fdt.found() and fdt_required.length() > 0
error('fdt not available but required by targets ' + ', '.join(fdt_required))
@@ -2705,6 +2715,7 @@ if have_system or have_user
'target/i386',
'target/i386/kvm',
'target/mips/tcg',
+ 'target/nios2',
'target/ppc',
'target/riscv',
'target/s390x',
diff --git a/migration/block.c b/migration/block.c
index a950977855..077a413325 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -932,7 +932,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
return -EINVAL;
}
- blk_invalidate_cache(blk, &local_err);
+ blk_activate(blk, &local_err);
if (local_err) {
error_report_err(local_err);
return -EINVAL;
diff --git a/migration/migration.c b/migration/migration.c
index 9cc344514b..695f0f2900 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -503,9 +503,9 @@ static void process_incoming_migration_bh(void *opaque)
if (!migrate_late_block_activate() ||
(autostart && (!global_state_received() ||
global_state_get_runstate() == RUN_STATE_RUNNING))) {
- /* Make sure all file formats flush their mutable metadata.
+ /* Make sure all file formats throw away their mutable metadata.
* If we get an error here, just don't restart the VM yet. */
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
local_err = NULL;
@@ -591,8 +591,8 @@ static void process_incoming_migration_co(void *opaque)
/* we get COLO info, and know if we are in COLO mode */
if (!ret && migration_incoming_colo_enabled()) {
- /* Make sure all file formats flush their mutable metadata */
- bdrv_invalidate_cache_all(&local_err);
+ /* Make sure all file formats throw away their mutable metadata */
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
goto fail;
@@ -1932,7 +1932,7 @@ static void migrate_fd_cancel(MigrationState *s)
if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
Error *local_err = NULL;
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
} else {
@@ -3111,7 +3111,7 @@ fail:
*/
Error *local_err = NULL;
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
}
@@ -3256,7 +3256,7 @@ fail_invalidate:
Error *local_err = NULL;
qemu_mutex_lock_iothread();
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
} else {
diff --git a/migration/savevm.c b/migration/savevm.c
index 967ff80547..02ed94c180 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1438,7 +1438,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
if (inactivate_disks) {
/* Inactivate before sending QEMU_VM_EOF so that the
- * bdrv_invalidate_cache_all() on the other end won't fail. */
+ * bdrv_activate_all() on the other end won't fail. */
ret = bdrv_inactivate_all();
if (ret) {
error_report("%s: bdrv_inactivate_all() failed (%d)",
@@ -2013,9 +2013,9 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
trace_loadvm_postcopy_handle_run_bh("after announce");
- /* Make sure all file formats flush their mutable metadata.
+ /* Make sure all file formats throw away their mutable metadata.
* If we get an error here, just don't restart the VM yet. */
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
local_err = NULL;
@@ -2808,6 +2808,8 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
g_autoptr(GDateTime) now = g_date_time_new_now_local();
AioContext *aio_context;
+ GLOBAL_STATE_CODE();
+
if (migration_is_blocked(errp)) {
return false;
}
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index df97582dd4..ad82c275c4 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -144,7 +144,7 @@ void qmp_cont(Error **errp)
* If there are no inactive block nodes (e.g. because the VM was just
* paused rather than completing a migration), bdrv_inactivate_all() simply
* doesn't do anything. */
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
diff --git a/nbd/server.c b/nbd/server.c
index 9fb2f26402..53e68cf027 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -25,6 +25,7 @@
#include "trace.h"
#include "nbd-internal.h"
#include "qemu/units.h"
+#include "qemu/memalign.h"
#define NBD_META_ID_BASE_ALLOCATION 0
#define NBD_META_ID_ALLOCATION_DEPTH 1
diff --git a/net/l2tpv3.c b/net/l2tpv3.c
index e4d4218db6..b8faa8796c 100644
--- a/net/l2tpv3.c
+++ b/net/l2tpv3.c
@@ -34,7 +34,7 @@
#include "qemu/sockets.h"
#include "qemu/iov.h"
#include "qemu/main-loop.h"
-
+#include "qemu/memalign.h"
/* The buffer size needs to be investigated for optimum numbers and
* optimum means of paging in on different systems. This size is
diff --git a/os-posix.c b/os-posix.c
index ae6c9f2a5e..24692c8593 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -317,6 +317,12 @@ bool is_daemonized(void)
return daemonize;
}
+int os_set_daemonize(bool d)
+{
+ daemonize = d;
+ return 0;
+}
+
int os_mlock(void)
{
#ifdef HAVE_MLOCKALL
diff --git a/pc-bios/bios-256k.bin b/pc-bios/bios-256k.bin
index e0796344df..6163fb8149 100644
--- a/pc-bios/bios-256k.bin
+++ b/pc-bios/bios-256k.bin
Binary files differ
diff --git a/pc-bios/bios-microvm.bin b/pc-bios/bios-microvm.bin
index f0215521b0..97fbd3192a 100644
--- a/pc-bios/bios-microvm.bin
+++ b/pc-bios/bios-microvm.bin
Binary files differ
diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin
index bcf8b484c9..68f65ff2fd 100644
--- a/pc-bios/bios.bin
+++ b/pc-bios/bios.bin
Binary files differ
diff --git a/pc-bios/vgabios-ati.bin b/pc-bios/vgabios-ati.bin
index 7171a56f9d..4533d0d063 100644
--- a/pc-bios/vgabios-ati.bin
+++ b/pc-bios/vgabios-ati.bin
Binary files differ
diff --git a/pc-bios/vgabios-bochs-display.bin b/pc-bios/vgabios-bochs-display.bin
index afea4c930d..3ecf92de01 100644
--- a/pc-bios/vgabios-bochs-display.bin
+++ b/pc-bios/vgabios-bochs-display.bin
Binary files differ
diff --git a/pc-bios/vgabios-cirrus.bin b/pc-bios/vgabios-cirrus.bin
index 194c8139a7..9b4ffdf45f 100644
--- a/pc-bios/vgabios-cirrus.bin
+++ b/pc-bios/vgabios-cirrus.bin
Binary files differ
diff --git a/pc-bios/vgabios-qxl.bin b/pc-bios/vgabios-qxl.bin
index 056b6657b3..8a27dac557 100644
--- a/pc-bios/vgabios-qxl.bin
+++ b/pc-bios/vgabios-qxl.bin
Binary files differ
diff --git a/pc-bios/vgabios-ramfb.bin b/pc-bios/vgabios-ramfb.bin
index 02662006f2..ec9541cfb4 100644
--- a/pc-bios/vgabios-ramfb.bin
+++ b/pc-bios/vgabios-ramfb.bin
Binary files differ
diff --git a/pc-bios/vgabios-stdvga.bin b/pc-bios/vgabios-stdvga.bin
index cf81ce2876..55390c45c9 100644
--- a/pc-bios/vgabios-stdvga.bin
+++ b/pc-bios/vgabios-stdvga.bin
Binary files differ
diff --git a/pc-bios/vgabios-virtio.bin b/pc-bios/vgabios-virtio.bin
index f4178f70de..2334733a75 100644
--- a/pc-bios/vgabios-virtio.bin
+++ b/pc-bios/vgabios-virtio.bin
Binary files differ
diff --git a/pc-bios/vgabios-vmware.bin b/pc-bios/vgabios-vmware.bin
index 8fae88af28..b668ac04a6 100644
--- a/pc-bios/vgabios-vmware.bin
+++ b/pc-bios/vgabios-vmware.bin
Binary files differ
diff --git a/pc-bios/vgabios.bin b/pc-bios/vgabios.bin
index e5f45f0c9e..a924891ea5 100644
--- a/pc-bios/vgabios.bin
+++ b/pc-bios/vgabios.bin
Binary files differ
diff --git a/plugins/loader.c b/plugins/loader.c
index 4883b0a1cb..88c30bde2d 100644
--- a/plugins/loader.c
+++ b/plugins/loader.c
@@ -27,6 +27,7 @@
#include "qemu/cacheinfo.h"
#include "qemu/xxhash.h"
#include "qemu/plugin.h"
+#include "qemu/memalign.h"
#include "hw/core/cpu.h"
#include "exec/exec-all.h"
#ifndef CONFIG_USER_ONLY
diff --git a/python/qemu/aqmp/legacy.py b/python/qemu/aqmp/legacy.py
index 6baa5f3409..46026e9fdc 100644
--- a/python/qemu/aqmp/legacy.py
+++ b/python/qemu/aqmp/legacy.py
@@ -57,7 +57,7 @@ class QEMUMonitorProtocol(qemu.qmp.QEMUMonitorProtocol):
self._timeout: Optional[float] = None
if server:
- self._aqmp._bind_hack(address) # pylint: disable=protected-access
+ self._sync(self._aqmp.start_server(self._address))
_T = TypeVar('_T')
@@ -90,10 +90,7 @@ class QEMUMonitorProtocol(qemu.qmp.QEMUMonitorProtocol):
self._aqmp.await_greeting = True
self._aqmp.negotiate = True
- self._sync(
- self._aqmp.accept(self._address),
- timeout
- )
+ self._sync(self._aqmp.accept(), timeout)
ret = self._get_greeting()
assert ret is not None
diff --git a/python/qemu/aqmp/protocol.py b/python/qemu/aqmp/protocol.py
index 33358f5cd7..36fae57f27 100644
--- a/python/qemu/aqmp/protocol.py
+++ b/python/qemu/aqmp/protocol.py
@@ -10,12 +10,14 @@ In this package, it is used as the implementation for the `QMPClient`
class.
"""
+# It's all the docstrings ... ! It's long for a good reason ^_^;
+# pylint: disable=too-many-lines
+
import asyncio
from asyncio import StreamReader, StreamWriter
from enum import Enum
from functools import wraps
import logging
-import socket
from ssl import SSLContext
from typing import (
Any,
@@ -239,8 +241,9 @@ class AsyncProtocol(Generic[T]):
self._runstate = Runstate.IDLE
self._runstate_changed: Optional[asyncio.Event] = None
- # Workaround for bind()
- self._sock: Optional[socket.socket] = None
+ # Server state for start_server() and _incoming()
+ self._server: Optional[asyncio.AbstractServer] = None
+ self._accepted: Optional[asyncio.Event] = None
def __repr__(self) -> str:
cls_name = type(self).__name__
@@ -265,21 +268,90 @@ class AsyncProtocol(Generic[T]):
@upper_half
@require(Runstate.IDLE)
- async def accept(self, address: SocketAddrT,
- ssl: Optional[SSLContext] = None) -> None:
+ async def start_server_and_accept(
+ self, address: SocketAddrT,
+ ssl: Optional[SSLContext] = None
+ ) -> None:
"""
Accept a connection and begin processing message queues.
If this call fails, `runstate` is guaranteed to be set back to `IDLE`.
+ This method is precisely equivalent to calling `start_server()`
+ followed by `accept()`.
+
+ :param address:
+ Address to listen on; UNIX socket path or TCP address/port.
+ :param ssl: SSL context to use, if any.
+
+ :raise StateError: When the `Runstate` is not `IDLE`.
+ :raise ConnectError:
+ When a connection or session cannot be established.
+
+ This exception will wrap a more concrete one. In most cases,
+ the wrapped exception will be `OSError` or `EOFError`. If a
+ protocol-level failure occurs while establishing a new
+ session, the wrapped error may also be an `QMPError`.
+ """
+ await self.start_server(address, ssl)
+ await self.accept()
+ assert self.runstate == Runstate.RUNNING
+
+ @upper_half
+ @require(Runstate.IDLE)
+ async def start_server(self, address: SocketAddrT,
+ ssl: Optional[SSLContext] = None) -> None:
+ """
+ Start listening for an incoming connection, but do not wait for a peer.
+
+ This method starts listening for an incoming connection, but
+ does not block waiting for a peer. This call will return
+ immediately after binding and listening on a socket. A later
+ call to `accept()` must be made in order to finalize the
+ incoming connection.
:param address:
- Address to listen to; UNIX socket path or TCP address/port.
+ Address to listen on; UNIX socket path or TCP address/port.
:param ssl: SSL context to use, if any.
:raise StateError: When the `Runstate` is not `IDLE`.
- :raise ConnectError: If a connection could not be accepted.
+ :raise ConnectError:
+ When the server could not start listening on this address.
+
+ This exception will wrap a more concrete one. In most cases,
+ the wrapped exception will be `OSError`.
+ """
+ await self._session_guard(
+ self._do_start_server(address, ssl),
+ 'Failed to establish connection')
+ assert self.runstate == Runstate.CONNECTING
+
+ @upper_half
+ @require(Runstate.CONNECTING)
+ async def accept(self) -> None:
+ """
+ Accept an incoming connection and begin processing message queues.
+
+ If this call fails, `runstate` is guaranteed to be set back to `IDLE`.
+
+ :raise StateError: When the `Runstate` is not `CONNECTING`.
+ :raise QMPError: When `start_server()` was not called yet.
+ :raise ConnectError:
+ When a connection or session cannot be established.
+
+ This exception will wrap a more concrete one. In most cases,
+ the wrapped exception will be `OSError` or `EOFError`. If a
+ protocol-level failure occurs while establishing a new
+ session, the wrapped error may also be an `QMPError`.
"""
- await self._new_session(address, ssl, accept=True)
+ if self._accepted is None:
+ raise QMPError("Cannot call accept() before start_server().")
+ await self._session_guard(
+ self._do_accept(),
+ 'Failed to establish connection')
+ await self._session_guard(
+ self._establish_session(),
+ 'Failed to establish session')
+ assert self.runstate == Runstate.RUNNING
@upper_half
@require(Runstate.IDLE)
@@ -295,9 +367,21 @@ class AsyncProtocol(Generic[T]):
:param ssl: SSL context to use, if any.
:raise StateError: When the `Runstate` is not `IDLE`.
- :raise ConnectError: If a connection cannot be made to the server.
+ :raise ConnectError:
+ When a connection or session cannot be established.
+
+ This exception will wrap a more concrete one. In most cases,
+ the wrapped exception will be `OSError` or `EOFError`. If a
+ protocol-level failure occurs while establishing a new
+ session, the wrapped error may also be an `QMPError`.
"""
- await self._new_session(address, ssl)
+ await self._session_guard(
+ self._do_connect(address, ssl),
+ 'Failed to establish connection')
+ await self._session_guard(
+ self._establish_session(),
+ 'Failed to establish session')
+ assert self.runstate == Runstate.RUNNING
@upper_half
async def disconnect(self) -> None:
@@ -317,153 +401,146 @@ class AsyncProtocol(Generic[T]):
# Section: Session machinery
# --------------------------
- @property
- def _runstate_event(self) -> asyncio.Event:
- # asyncio.Event() objects should not be created prior to entrance into
- # an event loop, so we can ensure we create it in the correct context.
- # Create it on-demand *only* at the behest of an 'async def' method.
- if not self._runstate_changed:
- self._runstate_changed = asyncio.Event()
- return self._runstate_changed
-
- @upper_half
- @bottom_half
- def _set_state(self, state: Runstate) -> None:
- """
- Change the `Runstate` of the protocol connection.
-
- Signals the `runstate_changed` event.
- """
- if state == self._runstate:
- return
-
- self.logger.debug("Transitioning from '%s' to '%s'.",
- str(self._runstate), str(state))
- self._runstate = state
- self._runstate_event.set()
- self._runstate_event.clear()
-
- @upper_half
- async def _new_session(self,
- address: SocketAddrT,
- ssl: Optional[SSLContext] = None,
- accept: bool = False) -> None:
+ async def _session_guard(self, coro: Awaitable[None], emsg: str) -> None:
"""
- Establish a new connection and initialize the session.
+ Async guard function used to roll back to `IDLE` on any error.
- Connect or accept a new connection, then begin the protocol
- session machinery. If this call fails, `runstate` is guaranteed
- to be set back to `IDLE`.
+ On any Exception, the state machine will be reset back to
+ `IDLE`. Most Exceptions will be wrapped with `ConnectError`, but
+ `BaseException` events will be left alone (This includes
+ asyncio.CancelledError, even prior to Python 3.8).
- :param address:
- Address to connect to/listen on;
- UNIX socket path or TCP address/port.
- :param ssl: SSL context to use, if any.
- :param accept: Accept a connection instead of connecting when `True`.
+ :param error_message:
+ Human-readable string describing what connection phase failed.
+ :raise BaseException:
+ When `BaseException` occurs in the guarded block.
:raise ConnectError:
- When a connection or session cannot be established.
-
- This exception will wrap a more concrete one. In most cases,
- the wrapped exception will be `OSError` or `EOFError`. If a
- protocol-level failure occurs while establishing a new
- session, the wrapped error may also be an `QMPError`.
+ When any other error is encountered in the guarded block.
"""
- assert self.runstate == Runstate.IDLE
-
+ # Note: After Python 3.6 support is removed, this should be an
+ # @asynccontextmanager instead of accepting a callback.
try:
- phase = "connection"
- await self._establish_connection(address, ssl, accept)
-
- phase = "session"
- await self._establish_session()
-
+ await coro
except BaseException as err:
- emsg = f"Failed to establish {phase}"
self.logger.error("%s: %s", emsg, exception_summary(err))
self.logger.debug("%s:\n%s\n", emsg, pretty_traceback())
try:
- # Reset from CONNECTING back to IDLE.
+ # Reset the runstate back to IDLE.
await self.disconnect()
except:
- emsg = "Unexpected bottom half exception"
+ # We don't expect any Exceptions from the disconnect function
+ # here, because we failed to connect in the first place.
+ # The disconnect() function is intended to perform
+ # only cannot-fail cleanup here, but you never know.
+ emsg = (
+ "Unexpected bottom half exception. "
+ "This is a bug in the QMP library. "
+ "Please report it to <qemu-devel@nongnu.org> and "
+ "CC: John Snow <jsnow@redhat.com>."
+ )
self.logger.critical("%s:\n%s\n", emsg, pretty_traceback())
raise
+ # CancelledError is an Exception with special semantic meaning;
+ # We do NOT want to wrap it up under ConnectError.
# NB: CancelledError is not a BaseException before Python 3.8
if isinstance(err, asyncio.CancelledError):
raise
+ # Any other kind of error can be treated as some kind of connection
+ # failure broadly. Inspect the 'exc' field to explore the root
+ # cause in greater detail.
if isinstance(err, Exception):
raise ConnectError(emsg, err) from err
# Raise BaseExceptions un-wrapped, they're more important.
raise
- assert self.runstate == Runstate.RUNNING
+ @property
+ def _runstate_event(self) -> asyncio.Event:
+ # asyncio.Event() objects should not be created prior to entrance into
+ # an event loop, so we can ensure we create it in the correct context.
+ # Create it on-demand *only* at the behest of an 'async def' method.
+ if not self._runstate_changed:
+ self._runstate_changed = asyncio.Event()
+ return self._runstate_changed
@upper_half
- async def _establish_connection(
- self,
- address: SocketAddrT,
- ssl: Optional[SSLContext] = None,
- accept: bool = False
- ) -> None:
+ @bottom_half
+ def _set_state(self, state: Runstate) -> None:
"""
- Establish a new connection.
+ Change the `Runstate` of the protocol connection.
- :param address:
- Address to connect to/listen on;
- UNIX socket path or TCP address/port.
- :param ssl: SSL context to use, if any.
- :param accept: Accept a connection instead of connecting when `True`.
+ Signals the `runstate_changed` event.
"""
- assert self.runstate == Runstate.IDLE
- self._set_state(Runstate.CONNECTING)
-
- # Allow runstate watchers to witness 'CONNECTING' state; some
- # failures in the streaming layer are synchronous and will not
- # otherwise yield.
- await asyncio.sleep(0)
+ if state == self._runstate:
+ return
- if accept:
- await self._do_accept(address, ssl)
- else:
- await self._do_connect(address, ssl)
+ self.logger.debug("Transitioning from '%s' to '%s'.",
+ str(self._runstate), str(state))
+ self._runstate = state
+ self._runstate_event.set()
+ self._runstate_event.clear()
- def _bind_hack(self, address: Union[str, Tuple[str, int]]) -> None:
+ @bottom_half
+ async def _stop_server(self) -> None:
+ """
+ Stop listening for / accepting new incoming connections.
"""
- Used to create a socket in advance of accept().
+ if self._server is None:
+ return
- This is a workaround to ensure that we can guarantee timing of
- precisely when a socket exists to avoid a connection attempt
- bouncing off of nothing.
+ try:
+ self.logger.debug("Stopping server.")
+ self._server.close()
+ await self._server.wait_closed()
+ self.logger.debug("Server stopped.")
+ finally:
+ self._server = None
- Python 3.7+ adds a feature to separate the server creation and
- listening phases instead, and should be used instead of this
- hack.
+ @bottom_half # However, it does not run from the R/W tasks.
+ async def _incoming(self,
+ reader: asyncio.StreamReader,
+ writer: asyncio.StreamWriter) -> None:
"""
- if isinstance(address, tuple):
- family = socket.AF_INET
- else:
- family = socket.AF_UNIX
+ Accept an incoming connection and signal the upper_half.
- sock = socket.socket(family, socket.SOCK_STREAM)
- sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ This method does the minimum necessary to accept a single
+ incoming connection. It signals back to the upper_half ASAP so
+ that any errors during session initialization can occur
+ naturally in the caller's stack.
- try:
- sock.bind(address)
- except:
- sock.close()
- raise
+ :param reader: Incoming `asyncio.StreamReader`
+ :param writer: Incoming `asyncio.StreamWriter`
+ """
+ peer = writer.get_extra_info('peername', 'Unknown peer')
+ self.logger.debug("Incoming connection from %s", peer)
+
+ if self._reader or self._writer:
+ # Sadly, we can have more than one pending connection
+ # because of https://bugs.python.org/issue46715
+ # Close any extra connections we don't actually want.
+ self.logger.warning("Extraneous connection inadvertently accepted")
+ writer.close()
+ return
- self._sock = sock
+ # A connection has been accepted; stop listening for new ones.
+ assert self._accepted is not None
+ await self._stop_server()
+ self._reader, self._writer = (reader, writer)
+ self._accepted.set()
@upper_half
- async def _do_accept(self, address: SocketAddrT,
- ssl: Optional[SSLContext] = None) -> None:
+ async def _do_start_server(self, address: SocketAddrT,
+ ssl: Optional[SSLContext] = None) -> None:
"""
- Acting as the transport server, accept a single connection.
+ Start listening for an incoming connection, but do not wait for a peer.
+
+ This method starts listening for an incoming connection, but does not
+ block waiting for a peer. This call will return immediately after
+ binding and listening to a socket. A later call to accept() must be
+ made in order to finalize the incoming connection.
:param address:
Address to listen on; UNIX socket path or TCP address/port.
@@ -471,52 +548,54 @@ class AsyncProtocol(Generic[T]):
:raise OSError: For stream-related errors.
"""
- self.logger.debug("Awaiting connection on %s ...", address)
- connected = asyncio.Event()
- server: Optional[asyncio.AbstractServer] = None
-
- async def _client_connected_cb(reader: asyncio.StreamReader,
- writer: asyncio.StreamWriter) -> None:
- """Used to accept a single incoming connection, see below."""
- nonlocal server
- nonlocal connected
-
- # A connection has been accepted; stop listening for new ones.
- assert server is not None
- server.close()
- await server.wait_closed()
- server = None
-
- # Register this client as being connected
- self._reader, self._writer = (reader, writer)
+ assert self.runstate == Runstate.IDLE
+ self._set_state(Runstate.CONNECTING)
- # Signal back: We've accepted a client!
- connected.set()
+ self.logger.debug("Awaiting connection on %s ...", address)
+ self._accepted = asyncio.Event()
if isinstance(address, tuple):
coro = asyncio.start_server(
- _client_connected_cb,
- host=None if self._sock else address[0],
- port=None if self._sock else address[1],
+ self._incoming,
+ host=address[0],
+ port=address[1],
ssl=ssl,
backlog=1,
limit=self._limit,
- sock=self._sock,
)
else:
coro = asyncio.start_unix_server(
- _client_connected_cb,
- path=None if self._sock else address,
+ self._incoming,
+ path=address,
ssl=ssl,
backlog=1,
limit=self._limit,
- sock=self._sock,
)
- server = await coro # Starts listening
- await connected.wait() # Waits for the callback to fire (and finish)
- assert server is None
- self._sock = None
+ # Allow runstate watchers to witness 'CONNECTING' state; some
+ # failures in the streaming layer are synchronous and will not
+ # otherwise yield.
+ await asyncio.sleep(0)
+
+ # This will start the server (bind(2), listen(2)). It will also
+ # call accept(2) if we yield, but we don't block on that here.
+ self._server = await coro
+ self.logger.debug("Server listening on %s", address)
+
+ @upper_half
+ async def _do_accept(self) -> None:
+ """
+ Wait for and accept an incoming connection.
+
+ Requires that we have not yet accepted an incoming connection
+ from the upper_half, but it's OK if the server is no longer
+ running because the bottom_half has already accepted the
+ connection.
+ """
+ assert self._accepted is not None
+ await self._accepted.wait()
+ assert self._server is None
+ self._accepted = None
self.logger.debug("Connection accepted.")
@@ -532,6 +611,14 @@ class AsyncProtocol(Generic[T]):
:raise OSError: For stream-related errors.
"""
+ assert self.runstate == Runstate.IDLE
+ self._set_state(Runstate.CONNECTING)
+
+ # Allow runstate watchers to witness 'CONNECTING' state; some
+ # failures in the streaming layer are synchronous and will not
+ # otherwise yield.
+ await asyncio.sleep(0)
+
self.logger.debug("Connecting to %s ...", address)
if isinstance(address, tuple):
@@ -644,6 +731,7 @@ class AsyncProtocol(Generic[T]):
self._reader = None
self._writer = None
+ self._accepted = None
# NB: _runstate_changed cannot be cleared because we still need it to
# send the final runstate changed event ...!
@@ -667,6 +755,9 @@ class AsyncProtocol(Generic[T]):
def _done(task: Optional['asyncio.Future[Any]']) -> bool:
return task is not None and task.done()
+ # If the server is running, stop it.
+ await self._stop_server()
+
# Are we already in an error pathway? If either of the tasks are
# already done, or if we have no tasks but a reader/writer; we
# must be.
diff --git a/python/tests/protocol.py b/python/tests/protocol.py
index 5cd7938be3..d6849ad306 100644
--- a/python/tests/protocol.py
+++ b/python/tests/protocol.py
@@ -41,12 +41,25 @@ class NullProtocol(AsyncProtocol[None]):
self.trigger_input = asyncio.Event()
await super()._establish_session()
- async def _do_accept(self, address, ssl=None):
- if not self.fake_session:
- await super()._do_accept(address, ssl)
+ async def _do_start_server(self, address, ssl=None):
+ if self.fake_session:
+ self._accepted = asyncio.Event()
+ self._set_state(Runstate.CONNECTING)
+ await asyncio.sleep(0)
+ else:
+ await super()._do_start_server(address, ssl)
+
+ async def _do_accept(self):
+ if self.fake_session:
+ self._accepted = None
+ else:
+ await super()._do_accept()
async def _do_connect(self, address, ssl=None):
- if not self.fake_session:
+ if self.fake_session:
+ self._set_state(Runstate.CONNECTING)
+ await asyncio.sleep(0)
+ else:
await super()._do_connect(address, ssl)
async def _do_recv(self) -> None:
@@ -413,14 +426,14 @@ class Accept(Connect):
assert family in ('INET', 'UNIX')
if family == 'INET':
- await self.proto.accept(('example.com', 1))
+ await self.proto.start_server_and_accept(('example.com', 1))
elif family == 'UNIX':
- await self.proto.accept('/dev/null')
+ await self.proto.start_server_and_accept('/dev/null')
async def _hanging_connection(self):
with TemporaryDirectory(suffix='.aqmp') as tmpdir:
sock = os.path.join(tmpdir, type(self.proto).__name__ + ".sock")
- await self.proto.accept(sock)
+ await self.proto.start_server_and_accept(sock)
class FakeSession(TestBase):
@@ -449,13 +462,13 @@ class FakeSession(TestBase):
@TestBase.async_test
async def testFakeAccept(self):
"""Test the full state lifecycle (via accept) with a no-op session."""
- await self.proto.accept('/not/a/real/path')
+ await self.proto.start_server_and_accept('/not/a/real/path')
self.assertEqual(self.proto.runstate, Runstate.RUNNING)
@TestBase.async_test
async def testFakeRecv(self):
"""Test receiving a fake/null message."""
- await self.proto.accept('/not/a/real/path')
+ await self.proto.start_server_and_accept('/not/a/real/path')
logname = self.proto.logger.name
with self.assertLogs(logname, level='DEBUG') as context:
@@ -471,7 +484,7 @@ class FakeSession(TestBase):
@TestBase.async_test
async def testFakeSend(self):
"""Test sending a fake/null message."""
- await self.proto.accept('/not/a/real/path')
+ await self.proto.start_server_and_accept('/not/a/real/path')
logname = self.proto.logger.name
with self.assertLogs(logname, level='DEBUG') as context:
@@ -493,7 +506,7 @@ class FakeSession(TestBase):
):
with self.assertRaises(StateError) as context:
if accept:
- await self.proto.accept('/not/a/real/path')
+ await self.proto.start_server_and_accept('/not/a/real/path')
else:
await self.proto.connect('/not/a/real/path')
@@ -504,7 +517,7 @@ class FakeSession(TestBase):
@TestBase.async_test
async def testAcceptRequireRunning(self):
"""Test that accept() cannot be called when Runstate=RUNNING"""
- await self.proto.accept('/not/a/real/path')
+ await self.proto.start_server_and_accept('/not/a/real/path')
await self._prod_session_api(
Runstate.RUNNING,
@@ -515,7 +528,7 @@ class FakeSession(TestBase):
@TestBase.async_test
async def testConnectRequireRunning(self):
"""Test that connect() cannot be called when Runstate=RUNNING"""
- await self.proto.accept('/not/a/real/path')
+ await self.proto.start_server_and_accept('/not/a/real/path')
await self._prod_session_api(
Runstate.RUNNING,
@@ -526,7 +539,7 @@ class FakeSession(TestBase):
@TestBase.async_test
async def testAcceptRequireDisconnecting(self):
"""Test that accept() cannot be called when Runstate=DISCONNECTING"""
- await self.proto.accept('/not/a/real/path')
+ await self.proto.start_server_and_accept('/not/a/real/path')
# Cheat: force a disconnect.
await self.proto.simulate_disconnect()
@@ -541,7 +554,7 @@ class FakeSession(TestBase):
@TestBase.async_test
async def testConnectRequireDisconnecting(self):
"""Test that connect() cannot be called when Runstate=DISCONNECTING"""
- await self.proto.accept('/not/a/real/path')
+ await self.proto.start_server_and_accept('/not/a/real/path')
# Cheat: force a disconnect.
await self.proto.simulate_disconnect()
@@ -576,7 +589,7 @@ class SimpleSession(TestBase):
async def testSmoke(self):
with TemporaryDirectory(suffix='.aqmp') as tmpdir:
sock = os.path.join(tmpdir, type(self.proto).__name__ + ".sock")
- server_task = create_task(self.server.accept(sock))
+ server_task = create_task(self.server.start_server_and_accept(sock))
# give the server a chance to start listening [...]
await asyncio.sleep(0)
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 9a5a3641d0..f13b5ff942 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2914,13 +2914,14 @@
# @blkreplay: Since 4.2
# @compress: Since 5.0
# @copy-before-write: Since 6.2
+# @snapshot-access: Since 7.0
#
# Since: 2.9
##
{ 'enum': 'BlockdevDriver',
'data': [ 'blkdebug', 'blklogwrites', 'blkreplay', 'blkverify', 'bochs',
'cloop', 'compress', 'copy-before-write', 'copy-on-read', 'dmg',
- 'file', 'ftp', 'ftps', 'gluster',
+ 'file', 'snapshot-access', 'ftp', 'ftps', 'gluster',
{'name': 'host_cdrom', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
{'name': 'host_device', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
'http', 'https', 'iscsi',
@@ -4171,11 +4172,19 @@
#
# @target: The target for copy-before-write operations.
#
+# @bitmap: If specified, copy-before-write filter will do
+# copy-before-write operations only for dirty regions of the
+# bitmap. Bitmap size must be equal to length of file and
+# target child of the filter. Note also, that bitmap is used
+# only to initialize internal bitmap of the process, so further
+# modifications (or removing) of specified bitmap doesn't
+# influence the filter. (Since 7.0)
+#
# Since: 6.2
##
{ 'struct': 'BlockdevOptionsCbw',
'base': 'BlockdevOptionsGenericFormat',
- 'data': { 'target': 'BlockdevRef' } }
+ 'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap' } }
##
# @BlockdevOptions:
@@ -4259,6 +4268,7 @@
'rbd': 'BlockdevOptionsRbd',
'replication': { 'type': 'BlockdevOptionsReplication',
'if': 'CONFIG_REPLICATION' },
+ 'snapshot-access': 'BlockdevOptionsGenericFormat',
'ssh': 'BlockdevOptionsSsh',
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
diff --git a/qemu-img.c b/qemu-img.c
index 6fe2466032..5dffb3e616 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -42,6 +42,7 @@
#include "qemu/module.h"
#include "qemu/sockets.h"
#include "qemu/units.h"
+#include "qemu/memalign.h"
#include "qom/object_interfaces.h"
#include "sysemu/block-backend.h"
#include "block/block_int.h"
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 46593d632d..633b46cdb2 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -21,6 +21,7 @@
#include "qemu/option.h"
#include "qemu/timer.h"
#include "qemu/cutils.h"
+#include "qemu/memalign.h"
#define CMD_NOFILE_OK 0x01
diff --git a/qom/object.c b/qom/object.c
index a27532a6ba..d34608558e 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -16,6 +16,7 @@
#include "qom/object.h"
#include "qom/object_interfaces.h"
#include "qemu/cutils.h"
+#include "qemu/memalign.h"
#include "qapi/visitor.h"
#include "qapi/string-input-visitor.h"
#include "qapi/string-output-visitor.h"
diff --git a/roms/seabios b/roms/seabios
-Subproject 6a62e0cb0dfe9cd28b70547dbea5caf76847c3a
+Subproject d239552ce7220e448ae81f41515138f7b9e3c4d
diff --git a/scripts/qmp/qmp-shell-wrap b/scripts/qmp/qmp-shell-wrap
index 9e94da114f..66846e36d1 100755
--- a/scripts/qmp/qmp-shell-wrap
+++ b/scripts/qmp/qmp-shell-wrap
@@ -4,7 +4,7 @@ import os
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu.qmp import qmp_shell
+from qemu.aqmp import qmp_shell
if __name__ == '__main__':
diff --git a/softmmu/arch_init.c b/softmmu/arch_init.c
index 8919405c7b..79716f959b 100644
--- a/softmmu/arch_init.c
+++ b/softmmu/arch_init.c
@@ -22,6 +22,7 @@
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
+#include "qemu/module.h"
#include "sysemu/arch_init.h"
#ifdef TARGET_SPARC
@@ -39,3 +40,11 @@ int graphic_depth = 32;
#endif
const uint32_t arch_type = QEMU_ARCH;
+
+void qemu_init_arch_modules(void)
+{
+#ifdef CONFIG_MODULES
+ module_init_info(qemu_modinfo);
+ module_allow_arch(TARGET_NAME);
+#endif
+}
diff --git a/softmmu/cpu-timers.c b/softmmu/cpu-timers.c
index 34ddfa02f1..204d946a17 100644
--- a/softmmu/cpu-timers.c
+++ b/softmmu/cpu-timers.c
@@ -28,7 +28,6 @@
#include "migration/vmstate.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
-#include "exec/exec-all.h"
#include "sysemu/cpus.h"
#include "qemu/main-loop.h"
#include "qemu/option.h"
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index 035395ae13..e1d84c8ccb 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "monitor/monitor.h"
+#include "qemu/coroutine-tls.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-machine.h"
#include "qapi/qapi-commands-misc.h"
@@ -32,7 +33,7 @@
#include "qapi/qmp/qerror.h"
#include "exec/gdbstub.h"
#include "sysemu/hw_accel.h"
-#include "exec/exec-all.h"
+#include "exec/cpu-common.h"
#include "qemu/thread.h"
#include "qemu/plugin.h"
#include "sysemu/cpus.h"
@@ -66,6 +67,11 @@
static QemuMutex qemu_global_mutex;
+/*
+ * The chosen accelerator is supposed to register this.
+ */
+static const AccelOpsClass *cpus_accel;
+
bool cpu_is_stopped(CPUState *cpu)
{
return cpu->stopped || !runstate_is_running();
@@ -84,10 +90,12 @@ bool cpu_thread_is_idle(CPUState *cpu)
if (cpu_is_stopped(cpu)) {
return true;
}
- if (!cpu->halted || cpu_has_work(cpu) ||
- kvm_halt_in_kernel() || whpx_apic_in_platform()) {
+ if (!cpu->halted || cpu_has_work(cpu)) {
return false;
}
+ if (cpus_accel->cpu_thread_is_idle) {
+ return cpus_accel->cpu_thread_is_idle(cpu);
+ }
return true;
}
@@ -121,11 +129,6 @@ void hw_error(const char *fmt, ...)
abort();
}
-/*
- * The chosen accelerator is supposed to register this.
- */
-static const AccelOpsClass *cpus_accel;
-
void cpu_synchronize_all_states(void)
{
CPUState *cpu;
@@ -192,7 +195,10 @@ void cpu_synchronize_pre_loadvm(CPUState *cpu)
bool cpus_are_resettable(void)
{
- return cpu_check_are_resettable();
+ if (cpus_accel->cpus_are_resettable) {
+ return cpus_accel->cpus_are_resettable();
+ }
+ return true;
}
int64_t cpus_get_virtual_clock(void)
@@ -473,11 +479,16 @@ bool qemu_in_vcpu_thread(void)
return current_cpu && qemu_cpu_is_self(current_cpu);
}
-static __thread bool iothread_locked = false;
+QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked)
bool qemu_mutex_iothread_locked(void)
{
- return iothread_locked;
+ return get_iothread_locked();
+}
+
+bool qemu_in_main_thread(void)
+{
+ return qemu_mutex_iothread_locked();
}
/*
@@ -490,13 +501,13 @@ void qemu_mutex_lock_iothread_impl(const char *file, int line)
g_assert(!qemu_mutex_iothread_locked());
bql_lock(&qemu_global_mutex, file, line);
- iothread_locked = true;
+ set_iothread_locked(true);
}
void qemu_mutex_unlock_iothread(void)
{
g_assert(qemu_mutex_iothread_locked());
- iothread_locked = false;
+ set_iothread_locked(false);
qemu_mutex_unlock(&qemu_global_mutex);
}
diff --git a/softmmu/globals.c b/softmmu/globals.c
index 7d0fc81183..3ebd718e35 100644
--- a/softmmu/globals.c
+++ b/softmmu/globals.c
@@ -25,8 +25,6 @@
#include "qemu/osdep.h"
#include "exec/cpu-common.h"
#include "hw/display/vga.h"
-#include "hw/i386/pc.h"
-#include "hw/i386/x86.h"
#include "hw/loader.h"
#include "hw/xen/xen.h"
#include "net/net.h"
diff --git a/softmmu/memory_mapping.c b/softmmu/memory_mapping.c
index a62eaa49cc..8320165ea2 100644
--- a/softmmu/memory_mapping.c
+++ b/softmmu/memory_mapping.c
@@ -17,6 +17,7 @@
#include "sysemu/memory_mapping.h"
#include "exec/memory.h"
#include "exec/address-spaces.h"
+#include "hw/core/cpu.h"
//#define DEBUG_GUEST_PHYS_REGION_ADD
diff --git a/softmmu/meson.build b/softmmu/meson.build
index 39f766ce7c..8138248661 100644
--- a/softmmu/meson.build
+++ b/softmmu/meson.build
@@ -1,20 +1,9 @@
specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: [files(
'arch_init.c',
- 'balloon.c',
- 'cpus.c',
- 'cpu-throttle.c',
- 'datadir.c',
- 'globals.c',
- 'physmem.c',
'ioport.c',
- 'rtc.c',
- 'runstate.c',
'memory.c',
- 'memory_mapping.c',
+ 'physmem.c',
'qtest.c',
- 'vl.c',
- 'cpu-timers.c',
- 'runstate-action.c',
)])
specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: [files(
@@ -22,9 +11,20 @@ specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: [files(
)])
softmmu_ss.add(files(
+ 'balloon.c',
'bootdevice.c',
+ 'cpus.c',
+ 'cpu-throttle.c',
+ 'cpu-timers.c',
+ 'datadir.c',
'dma-helpers.c',
+ 'globals.c',
+ 'memory_mapping.c',
'qdev-monitor.c',
+ 'rtc.c',
+ 'runstate-action.c',
+ 'runstate.c',
+ 'vl.c',
), sdl, libpmem, libdaxctl)
if have_tpm
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index a13289a594..43ae70fbe2 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -42,6 +42,7 @@
#include "qemu/config-file.h"
#include "qemu/error-report.h"
#include "qemu/qemu-print.h"
+#include "qemu/memalign.h"
#include "exec/memory.h"
#include "exec/ioport.h"
#include "sysemu/dma.h"
@@ -61,7 +62,6 @@
#include "exec/memory-internal.h"
#include "exec/ram_addr.h"
-#include "exec/log.h"
#include "qemu/pmem.h"
@@ -3436,11 +3436,11 @@ address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr,
#include "memory_ldst.c.inc"
/* virtual memory access for debug (includes writing to ROM) */
-int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
- void *ptr, target_ulong len, bool is_write)
+int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
+ void *ptr, size_t len, bool is_write)
{
hwaddr phys_addr;
- target_ulong l, page;
+ vaddr l, page;
uint8_t *buf = ptr;
cpu_synchronize_state(cpu);
diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
index 01f3834db5..12fe60c467 100644
--- a/softmmu/qdev-monitor.c
+++ b/softmmu/qdev-monitor.c
@@ -83,6 +83,8 @@ static const QDevAlias qdev_alias_table[] = {
{ "virtio-gpu-device", "virtio-gpu", QEMU_ARCH_VIRTIO_MMIO },
{ "virtio-gpu-ccw", "virtio-gpu", QEMU_ARCH_VIRTIO_CCW },
{ "virtio-gpu-pci", "virtio-gpu", QEMU_ARCH_VIRTIO_PCI },
+ { "virtio-gpu-gl-device", "virtio-gpu-gl", QEMU_ARCH_VIRTIO_MMIO },
+ { "virtio-gpu-gl-pci", "virtio-gpu-gl", QEMU_ARCH_VIRTIO_PCI },
{ "virtio-input-host-device", "virtio-input-host", QEMU_ARCH_VIRTIO_MMIO },
{ "virtio-input-host-ccw", "virtio-input-host", QEMU_ARCH_VIRTIO_CCW },
{ "virtio-input-host-pci", "virtio-input-host", QEMU_ARCH_VIRTIO_PCI },
@@ -971,6 +973,8 @@ BlockBackend *blk_by_qdev_id(const char *id, Error **errp)
DeviceState *dev;
BlockBackend *blk;
+ GLOBAL_STATE_CODE();
+
dev = find_device_state(id, errp);
if (dev == NULL) {
return NULL;
@@ -1034,6 +1038,13 @@ int qemu_global_option(const char *str)
if (!opts) {
return -1;
}
+ if (!qemu_opt_get(opts, "driver")
+ || !qemu_opt_get(opts, "property")
+ || !qemu_opt_get(opts, "value")) {
+ error_report("options 'driver', 'property', and 'value'"
+ " are required");
+ return -1;
+ }
return 0;
}
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 1fe028800f..0b81f61535 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2815,10 +2815,7 @@ void qemu_init(int argc, char **argv, char **envp)
error_init(argv[0]);
qemu_init_exec_dir(argv[0]);
-#ifdef CONFIG_MODULES
- module_init_info(qemu_modinfo);
- module_allow_arch(TARGET_NAME);
-#endif
+ qemu_init_arch_modules();
qemu_init_subsystems();
diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c
index 504d33aa91..dd18b2cde8 100644
--- a/storage-daemon/qemu-storage-daemon.c
+++ b/storage-daemon/qemu-storage-daemon.c
@@ -93,6 +93,9 @@ static void help(void)
" --chardev <options> configure a character device backend\n"
" (see the qemu(1) man page for possible options)\n"
"\n"
+" --daemonize daemonize the process, and have the parent exit\n"
+" once startup is complete\n"
+"\n"
" --export [type=]nbd,id=<id>,node-name=<node-name>[,name=<export-name>]\n"
" [,writable=on|off][,bitmap=<name>]\n"
" export the specified block node over NBD\n"
@@ -144,6 +147,7 @@ QEMU_HELP_BOTTOM "\n",
enum {
OPTION_BLOCKDEV = 256,
OPTION_CHARDEV,
+ OPTION_DAEMONIZE,
OPTION_EXPORT,
OPTION_MONITOR,
OPTION_NBD_SERVER,
@@ -177,13 +181,30 @@ static int getopt_set_loc(int argc, char **argv, const char *optstring,
return c;
}
-static void process_options(int argc, char *argv[])
+/**
+ * Process QSD command-line arguments.
+ *
+ * This is done in two passes:
+ *
+ * First (@pre_init_pass is true), we do a pass where all global
+ * arguments pertaining to the QSD process (like --help or --daemonize)
+ * are processed. This pass is done before most of the QEMU-specific
+ * initialization steps (e.g. initializing the block layer or QMP), and
+ * so must only process arguments that are not really QEMU-specific.
+ *
+ * Second (@pre_init_pass is false), we (sequentially) process all
+ * QEMU/QSD-specific arguments. Many of these arguments are effectively
+ * translated to QMP commands (like --blockdev for blockdev-add, or
+ * --export for block-export-add).
+ */
+static void process_options(int argc, char *argv[], bool pre_init_pass)
{
int c;
static const struct option long_options[] = {
{"blockdev", required_argument, NULL, OPTION_BLOCKDEV},
{"chardev", required_argument, NULL, OPTION_CHARDEV},
+ {"daemonize", no_argument, NULL, OPTION_DAEMONIZE},
{"export", required_argument, NULL, OPTION_EXPORT},
{"help", no_argument, NULL, 'h'},
{"monitor", required_argument, NULL, OPTION_MONITOR},
@@ -196,11 +217,27 @@ static void process_options(int argc, char *argv[])
};
/*
- * In contrast to the system emulator, options are processed in the order
- * they are given on the command lines. This means that things must be
- * defined first before they can be referenced in another option.
+ * In contrast to the system emulator, QEMU-specific options are processed
+ * in the order they are given on the command lines. This means that things
+ * must be defined first before they can be referenced in another option.
*/
+ optind = 1;
while ((c = getopt_set_loc(argc, argv, "-hT:V", long_options)) != -1) {
+ bool handle_option_pre_init;
+
+ /* Should this argument be processed in the pre-init pass? */
+ handle_option_pre_init =
+ c == '?' ||
+ c == 'h' ||
+ c == 'V' ||
+ c == OPTION_DAEMONIZE ||
+ c == OPTION_PIDFILE;
+
+ /* Process every option only in its respective pass */
+ if (pre_init_pass != handle_option_pre_init) {
+ continue;
+ }
+
switch (c) {
case '?':
exit(EXIT_FAILURE);
@@ -246,6 +283,12 @@ static void process_options(int argc, char *argv[])
qemu_opts_del(opts);
break;
}
+ case OPTION_DAEMONIZE:
+ if (os_set_daemonize(true) < 0) {
+ error_report("--daemonize not supported in this build");
+ exit(EXIT_FAILURE);
+ }
+ break;
case OPTION_EXPORT:
{
Visitor *v;
@@ -334,6 +377,10 @@ int main(int argc, char *argv[])
qemu_init_exec_dir(argv[0]);
os_setup_signal_handling();
+ process_options(argc, argv, true);
+
+ os_daemonize();
+
module_call_init(MODULE_INIT_QOM);
module_call_init(MODULE_INIT_TRACE);
qemu_add_opts(&qemu_trace_opts);
@@ -348,7 +395,7 @@ int main(int argc, char *argv[])
qemu_set_log(LOG_TRACE);
qemu_init_main_loop(&error_fatal);
- process_options(argc, argv);
+ process_options(argc, argv, false);
/*
* Write the pid file after creating chardevs, exports, and NBD servers but
@@ -356,6 +403,7 @@ int main(int argc, char *argv[])
* it.
*/
pid_file_init();
+ os_setup_post();
while (!exit_requested) {
main_loop_wait(false);
diff --git a/stubs/iothread-lock-block.c b/stubs/iothread-lock-block.c
new file mode 100644
index 0000000000..c88ed70462
--- /dev/null
+++ b/stubs/iothread-lock-block.c
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+
+bool qemu_in_main_thread(void)
+{
+ return qemu_get_current_aio_context() == qemu_get_aio_context();
+}
+
diff --git a/stubs/meson.build b/stubs/meson.build
index d359cbe1ad..6f80fec761 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -17,6 +17,9 @@ if linux_io_uring.found()
stub_ss.add(files('io_uring.c'))
endif
stub_ss.add(files('iothread-lock.c'))
+if have_block
+ stub_ss.add(files('iothread-lock-block.c'))
+endif
stub_ss.add(files('isa-bus.c'))
stub_ss.add(files('is-daemonized.c'))
if libaio.found()
diff --git a/target/alpha/cpu-qom.h b/target/alpha/cpu-qom.h
index 7bb9173c57..1f200724b6 100644
--- a/target/alpha/cpu-qom.h
+++ b/target/alpha/cpu-qom.h
@@ -25,8 +25,7 @@
#define TYPE_ALPHA_CPU "alpha-cpu"
-OBJECT_DECLARE_TYPE(AlphaCPU, AlphaCPUClass,
- ALPHA_CPU)
+OBJECT_DECLARE_CPU_TYPE(AlphaCPU, AlphaCPUClass, ALPHA_CPU)
/**
* AlphaCPUClass:
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
index e819211503..58f00b7814 100644
--- a/target/alpha/cpu.h
+++ b/target/alpha/cpu.h
@@ -197,9 +197,7 @@ enum {
#define MMU_USER_IDX 1
#define MMU_PHYS_IDX 2
-typedef struct CPUAlphaState CPUAlphaState;
-
-struct CPUAlphaState {
+typedef struct CPUArchState {
uint64_t ir[31];
float64 fir[31];
uint64_t pc;
@@ -251,7 +249,7 @@ struct CPUAlphaState {
uint32_t features;
uint32_t amask;
int implver;
-};
+} CPUAlphaState;
/**
* AlphaCPU:
@@ -259,7 +257,7 @@ struct CPUAlphaState {
*
* An Alpha CPU.
*/
-struct AlphaCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -285,9 +283,6 @@ int alpha_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
#define cpu_list alpha_cpu_list
-typedef CPUAlphaState CPUArchState;
-typedef AlphaCPU ArchCPU;
-
#include "exec/cpu-all.h"
enum {
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index ca78a0faed..66768ab47a 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -20,7 +20,6 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "sysemu/cpus.h"
-#include "sysemu/cpu-timers.h"
#include "disas/disas.h"
#include "qemu/host-utils.h"
#include "exec/exec-all.h"
diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h
index a22bd506d0..64c44cef2d 100644
--- a/target/arm/cpu-qom.h
+++ b/target/arm/cpu-qom.h
@@ -27,8 +27,7 @@ struct arm_boot_info;
#define TYPE_ARM_CPU "arm-cpu"
-OBJECT_DECLARE_TYPE(ARMCPU, ARMCPUClass,
- ARM_CPU)
+OBJECT_DECLARE_CPU_TYPE(ARMCPU, ARMCPUClass, ARM_CPU)
#define TYPE_ARM_MAX_CPU "max-" TYPE_ARM_CPU
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 7091684a16..185d4e774d 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1392,6 +1392,12 @@ void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp)
error_propagate(errp, local_err);
return;
}
+
+ arm_cpu_lpa2_finalize(cpu, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ return;
+ }
}
if (kvm_enabled()) {
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 24d9fff170..157f214cce 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -204,10 +204,12 @@ typedef struct {
# define ARM_MAX_VQ 16
void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp);
void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp);
+void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp);
#else
# define ARM_MAX_VQ 1
static inline void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) { }
static inline void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) { }
+static inline void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp) { }
#endif
typedef struct ARMVectorReg {
@@ -232,7 +234,7 @@ typedef struct CPUARMTBFlags {
target_ulong flags2;
} CPUARMTBFlags;
-typedef struct CPUARMState {
+typedef struct CPUArchState {
/* Regs for current mode. */
uint32_t regs[16];
@@ -774,7 +776,7 @@ typedef struct ARMISARegisters ARMISARegisters;
*
* An ARM CPU core.
*/
-struct ARMCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -975,10 +977,11 @@ struct ARMCPU {
/*
* Intermediate values used during property parsing.
- * Once finalized, the values should be read from ID_AA64ISAR1.
+ * Once finalized, the values should be read from ID_AA64*.
*/
bool prop_pauth;
bool prop_pauth_impdef;
+ bool prop_lpa2;
/* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */
uint32_t dcz_blocksize;
@@ -3410,9 +3413,6 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
}
}
-typedef CPUARMState CPUArchState;
-typedef ARMCPU ArchCPU;
-
#include "exec/cpu-all.h"
/*
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 2fdc16bf18..eb44c05822 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -688,6 +688,29 @@ void aarch64_add_pauth_properties(Object *obj)
}
}
+static Property arm_cpu_lpa2_property =
+ DEFINE_PROP_BOOL("lpa2", ARMCPU, prop_lpa2, true);
+
+void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp)
+{
+ uint64_t t;
+
+ /*
+ * We only install the property for tcg -cpu max; this is the
+ * only situation in which the cpu field can be true.
+ */
+ if (!cpu->prop_lpa2) {
+ return;
+ }
+
+ t = cpu->isar.id_aa64mmfr0;
+ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 2); /* 16k pages w/ LPA2 */
+ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4, 1); /* 4k pages w/ LPA2 */
+ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16_2, 3); /* 16k stage2 w/ LPA2 */
+ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 3); /* 4k stage2 w/ LPA2 */
+ cpu->isar.id_aa64mmfr0 = t;
+}
+
static void aarch64_host_initfn(Object *obj)
{
#if defined(CONFIG_KVM)
@@ -897,6 +920,7 @@ static void aarch64_max_initfn(Object *obj)
aarch64_add_sve_properties(obj);
object_property_add(obj, "sve-max-vq", "uint32", cpu_max_get_sve_max_vq,
cpu_max_set_sve_max_vq, NULL, NULL);
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property);
}
static void aarch64_a64fx_initfn(Object *obj)
diff --git a/target/arm/hvf_arm.h b/target/arm/hvf_arm.h
index ea238cff83..9a9d1a0bf5 100644
--- a/target/arm/hvf_arm.h
+++ b/target/arm/hvf_arm.h
@@ -13,6 +13,6 @@
#include "cpu.h"
-void hvf_arm_set_cpu_features_from_host(struct ARMCPU *cpu);
+void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu);
#endif
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
index 3854dd3516..384604c009 100644
--- a/target/arm/translate-neon.c
+++ b/target/arm/translate-neon.c
@@ -657,21 +657,24 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
/* Catch the UNDEF cases. This is unavoidably a bit messy. */
switch (nregs) {
case 1:
+ if (a->stride != 1) {
+ return false;
+ }
if (((a->align & (1 << a->size)) != 0) ||
(a->size == 2 && (a->align == 1 || a->align == 2))) {
return false;
}
break;
- case 3:
- if ((a->align & 1) != 0) {
- return false;
- }
- /* fall through */
case 2:
if (a->size == 2 && (a->align & 2) != 0) {
return false;
}
break;
+ case 3:
+ if (a->align != 0) {
+ return false;
+ }
+ break;
case 4:
if (a->size == 2 && a->align == 3) {
return false;
diff --git a/target/avr/cpu-qom.h b/target/avr/cpu-qom.h
index 14e5b3ce72..32a1c762e6 100644
--- a/target/avr/cpu-qom.h
+++ b/target/avr/cpu-qom.h
@@ -26,8 +26,7 @@
#define TYPE_AVR_CPU "avr-cpu"
-OBJECT_DECLARE_TYPE(AVRCPU, AVRCPUClass,
- AVR_CPU)
+OBJECT_DECLARE_CPU_TYPE(AVRCPU, AVRCPUClass, AVR_CPU)
/**
* AVRCPUClass:
diff --git a/target/avr/cpu.h b/target/avr/cpu.h
index dceacf3cd7..55497f851d 100644
--- a/target/avr/cpu.h
+++ b/target/avr/cpu.h
@@ -108,9 +108,7 @@ typedef enum AVRFeature {
AVR_FEATURE_RAMPZ,
} AVRFeature;
-typedef struct CPUAVRState CPUAVRState;
-
-struct CPUAVRState {
+typedef struct CPUArchState {
uint32_t pc_w; /* 0x003fffff up to 22 bits */
uint32_t sregC; /* 0x00000001 1 bit */
@@ -137,7 +135,7 @@ struct CPUAVRState {
bool fullacc; /* CPU/MEM if true MEM only otherwise */
uint64_t features;
-};
+} CPUAVRState;
/**
* AVRCPU:
@@ -145,14 +143,14 @@ struct CPUAVRState {
*
* A AVR CPU.
*/
-typedef struct AVRCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
CPUNegativeOffsetState neg;
CPUAVRState env;
-} AVRCPU;
+};
extern const struct VMStateDescription vms_avr_cpu;
@@ -247,9 +245,6 @@ bool avr_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
MMUAccessType access_type, int mmu_idx,
bool probe, uintptr_t retaddr);
-typedef CPUAVRState CPUArchState;
-typedef AVRCPU ArchCPU;
-
#include "exec/cpu-all.h"
#endif /* !defined (QEMU_AVR_CPU_H) */
diff --git a/target/cris/cpu-qom.h b/target/cris/cpu-qom.h
index 2596edc7e3..71e8af0e70 100644
--- a/target/cris/cpu-qom.h
+++ b/target/cris/cpu-qom.h
@@ -25,8 +25,7 @@
#define TYPE_CRIS_CPU "cris-cpu"
-OBJECT_DECLARE_TYPE(CRISCPU, CRISCPUClass,
- CRIS_CPU)
+OBJECT_DECLARE_CPU_TYPE(CRISCPU, CRISCPUClass, CRIS_CPU)
/**
* CRISCPUClass:
diff --git a/target/cris/cpu.h b/target/cris/cpu.h
index b445b194ea..e6776f25b1 100644
--- a/target/cris/cpu.h
+++ b/target/cris/cpu.h
@@ -105,7 +105,7 @@ typedef struct {
uint32_t lo;
} TLBSet;
-typedef struct CPUCRISState {
+typedef struct CPUArchState {
uint32_t regs[16];
/* P0 - P15 are referred to as special registers in the docs. */
uint32_t pregs[16];
@@ -173,7 +173,7 @@ typedef struct CPUCRISState {
*
* A CRIS CPU.
*/
-struct CRISCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -265,9 +265,6 @@ static inline int cpu_mmu_index (CPUCRISState *env, bool ifetch)
#define SFR_RW_MM_TLB_LO env->pregs[PR_SRS]][5
#define SFR_RW_MM_TLB_HI env->pregs[PR_SRS]][6
-typedef CPUCRISState CPUArchState;
-typedef CRISCPU ArchCPU;
-
#include "exec/cpu-all.h"
static inline void cpu_get_tb_cpu_state(CPUCRISState *env, target_ulong *pc,
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index 58a0d3870b..2a65a57bab 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -18,14 +18,13 @@
#ifndef HEXAGON_CPU_H
#define HEXAGON_CPU_H
-/* Forward declaration needed by some of the header files */
-typedef struct CPUHexagonState CPUHexagonState;
-
#include "fpu/softfloat-types.h"
#include "exec/cpu-defs.h"
#include "hex_regs.h"
#include "mmvec/mmvec.h"
+#include "qom/object.h"
+#include "hw/core/cpu.h"
#define NUM_PREGS 4
#define TOTAL_PER_THREAD_REGS 64
@@ -75,7 +74,7 @@ typedef struct {
/* Maximum number of vector temps in a packet */
#define VECTOR_TEMPS_MAX 4
-struct CPUHexagonState {
+typedef struct CPUArchState {
target_ulong gpr[TOTAL_PER_THREAD_REGS];
target_ulong pred[NUM_PREGS];
target_ulong branch_taken;
@@ -129,14 +128,9 @@ struct CPUHexagonState {
target_ulong vstore_pending[VSTORES_MAX];
bool vtcm_pending;
VTCMStoreLog vtcm_log;
-};
+} CPUHexagonState;
-#define HEXAGON_CPU_CLASS(klass) \
- OBJECT_CLASS_CHECK(HexagonCPUClass, (klass), TYPE_HEXAGON_CPU)
-#define HEXAGON_CPU(obj) \
- OBJECT_CHECK(HexagonCPU, (obj), TYPE_HEXAGON_CPU)
-#define HEXAGON_CPU_GET_CLASS(obj) \
- OBJECT_GET_CLASS(HexagonCPUClass, (obj), TYPE_HEXAGON_CPU)
+OBJECT_DECLARE_CPU_TYPE(HexagonCPU, HexagonCPUClass, HEXAGON_CPU)
typedef struct HexagonCPUClass {
/*< private >*/
@@ -146,7 +140,7 @@ typedef struct HexagonCPUClass {
DeviceReset parent_reset;
} HexagonCPUClass;
-typedef struct HexagonCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -155,7 +149,7 @@ typedef struct HexagonCPU {
bool lldb_compat;
target_ulong lldb_stack_adjust;
-} HexagonCPU;
+};
#include "cpu_bits.h"
@@ -180,7 +174,6 @@ static inline int cpu_mmu_index(CPUHexagonState *env, bool ifetch)
#endif
}
-typedef struct CPUHexagonState CPUArchState;
typedef HexagonCPU ArchCPU;
void hexagon_translate_init(void);
diff --git a/target/hppa/cpu-qom.h b/target/hppa/cpu-qom.h
index d424f88370..b96e0318c7 100644
--- a/target/hppa/cpu-qom.h
+++ b/target/hppa/cpu-qom.h
@@ -25,8 +25,7 @@
#define TYPE_HPPA_CPU "hppa-cpu"
-OBJECT_DECLARE_TYPE(HPPACPU, HPPACPUClass,
- HPPA_CPU)
+OBJECT_DECLARE_CPU_TYPE(HPPACPU, HPPACPUClass, HPPA_CPU)
/**
* HPPACPUClass:
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
index 93c119532a..4cc936b6bf 100644
--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -138,8 +138,6 @@
#define CR_IPSW 22
#define CR_EIRR 23
-typedef struct CPUHPPAState CPUHPPAState;
-
#if TARGET_REGISTER_BITS == 32
typedef uint32_t target_ureg;
typedef int32_t target_sreg;
@@ -168,7 +166,7 @@ typedef struct {
unsigned access_id : 16;
} hppa_tlb_entry;
-struct CPUHPPAState {
+typedef struct CPUArchState {
target_ureg gr[32];
uint64_t fr[32];
uint64_t sr[8]; /* stored shifted into place for gva */
@@ -207,7 +205,7 @@ struct CPUHPPAState {
/* ??? We should use a more intelligent data structure. */
hppa_tlb_entry tlb[HPPA_TLB_ENTRIES];
uint32_t tlb_last;
-};
+} CPUHPPAState;
/**
* HPPACPU:
@@ -215,7 +213,7 @@ struct CPUHPPAState {
*
* An HPPA CPU.
*/
-struct HPPACPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -225,10 +223,6 @@ struct HPPACPU {
QEMUTimer *alarm_timer;
};
-
-typedef CPUHPPAState CPUArchState;
-typedef HPPACPU ArchCPU;
-
#include "exec/cpu-all.h"
static inline int cpu_mmu_index(CPUHPPAState *env, bool ifetch)
diff --git a/target/i386/cpu-qom.h b/target/i386/cpu-qom.h
index f9923cee04..c557a522e1 100644
--- a/target/i386/cpu-qom.h
+++ b/target/i386/cpu-qom.h
@@ -30,8 +30,7 @@
#define TYPE_X86_CPU "i386-cpu"
#endif
-OBJECT_DECLARE_TYPE(X86CPU, X86CPUClass,
- X86_CPU)
+OBJECT_DECLARE_CPU_TYPE(X86CPU, X86CPUClass, X86_CPU)
typedef struct X86CPUModel X86CPUModel;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index e69ab5dd78..e11734ba86 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1431,7 +1431,7 @@ typedef struct HVFX86LazyFlags {
target_ulong auxbits;
} HVFX86LazyFlags;
-typedef struct CPUX86State {
+typedef struct CPUArchState {
/* standard registers */
target_ulong regs[CPU_NB_REGS];
target_ulong eip;
@@ -1707,7 +1707,7 @@ struct kvm_msrs;
*
* An x86 CPU.
*/
-struct X86CPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -2074,9 +2074,6 @@ static inline int cpu_mmu_index_kernel(CPUX86State *env)
#define CC_SRC2 (env->cc_src2)
#define CC_OP (env->cc_op)
-typedef CPUX86State CPUArchState;
-typedef X86CPU ArchCPU;
-
#include "exec/cpu-all.h"
#include "svm.h"
diff --git a/target/i386/hax/hax-all.c b/target/i386/hax/hax-all.c
index bf65ed6fa9..81f665e212 100644
--- a/target/i386/hax/hax-all.c
+++ b/target/i386/hax/hax-all.c
@@ -49,18 +49,13 @@ const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
/* Minimum HAX kernel version */
const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
-static bool hax_allowed;
+bool hax_allowed;
struct hax_state hax_global;
static void hax_vcpu_sync_state(CPUArchState *env, int modified);
static int hax_arch_get_registers(CPUArchState *env);
-int hax_enabled(void)
-{
- return hax_allowed;
-}
-
int valid_hax_tunnel_size(uint16_t size)
{
return size >= sizeof(struct hax_tunnel);
@@ -227,7 +222,7 @@ int hax_init_vcpu(CPUState *cpu)
cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
cpu->vcpu_dirty = true;
- qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
+ qemu_register_reset(hax_reset_vcpu_state, cpu->env_ptr);
return ret;
}
@@ -674,7 +669,7 @@ void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
int hax_smp_cpu_exec(CPUState *cpu)
{
- CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
+ CPUArchState *env = cpu->env_ptr;
int fatal;
int ret;
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 4ba6e82fab..fc12c02fb2 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -49,6 +49,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/error-report.h"
+#include "qemu/memalign.h"
#include "sysemu/hvf.h"
#include "sysemu/hvf_int.h"
diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c
index 7c8203b21f..050428795b 100644
--- a/target/i386/hvf/x86_emu.c
+++ b/target/i386/hvf/x86_emu.c
@@ -171,12 +171,12 @@ void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size)
}
}
-static bool is_host_reg(struct CPUX86State *env, target_ulong ptr)
+static bool is_host_reg(CPUX86State *env, target_ulong ptr)
{
return (ptr - (target_ulong)&env->regs[0]) < sizeof(env->regs);
}
-void write_val_ext(struct CPUX86State *env, target_ulong ptr, target_ulong val, int size)
+void write_val_ext(CPUX86State *env, target_ulong ptr, target_ulong val, int size)
{
if (is_host_reg(env, ptr)) {
write_val_to_reg(ptr, val, size);
@@ -185,14 +185,14 @@ void write_val_ext(struct CPUX86State *env, target_ulong ptr, target_ulong val,
vmx_write_mem(env_cpu(env), ptr, &val, size);
}
-uint8_t *read_mmio(struct CPUX86State *env, target_ulong ptr, int bytes)
+uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes)
{
vmx_read_mem(env_cpu(env), env->hvf_mmio_buf, ptr, bytes);
return env->hvf_mmio_buf;
}
-target_ulong read_val_ext(struct CPUX86State *env, target_ulong ptr, int size)
+target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size)
{
target_ulong val;
uint8_t *mmio_ptr;
@@ -222,7 +222,7 @@ target_ulong read_val_ext(struct CPUX86State *env, target_ulong ptr, int size)
return val;
}
-static void fetch_operands(struct CPUX86State *env, struct x86_decode *decode,
+static void fetch_operands(CPUX86State *env, struct x86_decode *decode,
int n, bool val_op0, bool val_op1, bool val_op2)
{
int i;
@@ -261,7 +261,7 @@ static void fetch_operands(struct CPUX86State *env, struct x86_decode *decode,
}
}
-static void exec_mov(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_mov(CPUX86State *env, struct x86_decode *decode)
{
fetch_operands(env, decode, 2, false, true, false);
write_val_ext(env, decode->op[0].ptr, decode->op[1].val,
@@ -270,49 +270,49 @@ static void exec_mov(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-static void exec_add(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_add(CPUX86State *env, struct x86_decode *decode)
{
EXEC_2OP_FLAGS_CMD(env, decode, +, SET_FLAGS_OSZAPC_ADD, true);
env->eip += decode->len;
}
-static void exec_or(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_or(CPUX86State *env, struct x86_decode *decode)
{
EXEC_2OP_FLAGS_CMD(env, decode, |, SET_FLAGS_OSZAPC_LOGIC, true);
env->eip += decode->len;
}
-static void exec_adc(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_adc(CPUX86State *env, struct x86_decode *decode)
{
EXEC_2OP_FLAGS_CMD(env, decode, +get_CF(env)+, SET_FLAGS_OSZAPC_ADD, true);
env->eip += decode->len;
}
-static void exec_sbb(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_sbb(CPUX86State *env, struct x86_decode *decode)
{
EXEC_2OP_FLAGS_CMD(env, decode, -get_CF(env)-, SET_FLAGS_OSZAPC_SUB, true);
env->eip += decode->len;
}
-static void exec_and(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_and(CPUX86State *env, struct x86_decode *decode)
{
EXEC_2OP_FLAGS_CMD(env, decode, &, SET_FLAGS_OSZAPC_LOGIC, true);
env->eip += decode->len;
}
-static void exec_sub(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_sub(CPUX86State *env, struct x86_decode *decode)
{
EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, true);
env->eip += decode->len;
}
-static void exec_xor(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_xor(CPUX86State *env, struct x86_decode *decode)
{
EXEC_2OP_FLAGS_CMD(env, decode, ^, SET_FLAGS_OSZAPC_LOGIC, true);
env->eip += decode->len;
}
-static void exec_neg(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_neg(CPUX86State *env, struct x86_decode *decode)
{
/*EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, false);*/
int32_t val;
@@ -335,13 +335,13 @@ static void exec_neg(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-static void exec_cmp(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_cmp(CPUX86State *env, struct x86_decode *decode)
{
EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, false);
env->eip += decode->len;
}
-static void exec_inc(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_inc(CPUX86State *env, struct x86_decode *decode)
{
decode->op[1].type = X86_VAR_IMMEDIATE;
decode->op[1].val = 0;
@@ -351,7 +351,7 @@ static void exec_inc(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-static void exec_dec(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_dec(CPUX86State *env, struct x86_decode *decode)
{
decode->op[1].type = X86_VAR_IMMEDIATE;
decode->op[1].val = 0;
@@ -360,13 +360,13 @@ static void exec_dec(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-static void exec_tst(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_tst(CPUX86State *env, struct x86_decode *decode)
{
EXEC_2OP_FLAGS_CMD(env, decode, &, SET_FLAGS_OSZAPC_LOGIC, false);
env->eip += decode->len;
}
-static void exec_not(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_not(CPUX86State *env, struct x86_decode *decode)
{
fetch_operands(env, decode, 1, true, false, false);
@@ -375,7 +375,7 @@ static void exec_not(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-void exec_movzx(struct CPUX86State *env, struct x86_decode *decode)
+void exec_movzx(CPUX86State *env, struct x86_decode *decode)
{
int src_op_size;
int op_size = decode->operand_size;
@@ -395,7 +395,7 @@ void exec_movzx(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-static void exec_out(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_out(CPUX86State *env, struct x86_decode *decode)
{
switch (decode->opcode[0]) {
case 0xe6:
@@ -419,7 +419,7 @@ static void exec_out(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-static void exec_in(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_in(CPUX86State *env, struct x86_decode *decode)
{
target_ulong val = 0;
switch (decode->opcode[0]) {
@@ -455,7 +455,7 @@ static void exec_in(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-static inline void string_increment_reg(struct CPUX86State *env, int reg,
+static inline void string_increment_reg(CPUX86State *env, int reg,
struct x86_decode *decode)
{
target_ulong val = read_reg(env, reg, decode->addressing_size);
@@ -467,8 +467,8 @@ static inline void string_increment_reg(struct CPUX86State *env, int reg,
write_reg(env, reg, val, decode->addressing_size);
}
-static inline void string_rep(struct CPUX86State *env, struct x86_decode *decode,
- void (*func)(struct CPUX86State *env,
+static inline void string_rep(CPUX86State *env, struct x86_decode *decode,
+ void (*func)(CPUX86State *env,
struct x86_decode *ins), int rep)
{
target_ulong rcx = read_reg(env, R_ECX, decode->addressing_size);
@@ -484,7 +484,7 @@ static inline void string_rep(struct CPUX86State *env, struct x86_decode *decode
}
}
-static void exec_ins_single(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_ins_single(CPUX86State *env, struct x86_decode *decode)
{
target_ulong addr = linear_addr_size(env_cpu(env), RDI(env),
decode->addressing_size, R_ES);
@@ -497,7 +497,7 @@ static void exec_ins_single(struct CPUX86State *env, struct x86_decode *decode)
string_increment_reg(env, R_EDI, decode);
}
-static void exec_ins(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_ins(CPUX86State *env, struct x86_decode *decode)
{
if (decode->rep) {
string_rep(env, decode, exec_ins_single, 0);
@@ -508,7 +508,7 @@ static void exec_ins(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-static void exec_outs_single(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_outs_single(CPUX86State *env, struct x86_decode *decode)
{
target_ulong addr = decode_linear_addr(env, decode, RSI(env), R_DS);
@@ -520,7 +520,7 @@ static void exec_outs_single(struct CPUX86State *env, struct x86_decode *decode)
string_increment_reg(env, R_ESI, decode);
}
-static void exec_outs(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_outs(CPUX86State *env, struct x86_decode *decode)
{
if (decode->rep) {
string_rep(env, decode, exec_outs_single, 0);
@@ -531,7 +531,7 @@ static void exec_outs(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-static void exec_movs_single(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_movs_single(CPUX86State *env, struct x86_decode *decode)
{
target_ulong src_addr;
target_ulong dst_addr;
@@ -548,7 +548,7 @@ static void exec_movs_single(struct CPUX86State *env, struct x86_decode *decode)
string_increment_reg(env, R_EDI, decode);
}
-static void exec_movs(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_movs(CPUX86State *env, struct x86_decode *decode)
{
if (decode->rep) {
string_rep(env, decode, exec_movs_single, 0);
@@ -559,7 +559,7 @@ static void exec_movs(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-static void exec_cmps_single(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_cmps_single(CPUX86State *env, struct x86_decode *decode)
{
target_ulong src_addr;
target_ulong dst_addr;
@@ -579,7 +579,7 @@ static void exec_cmps_single(struct CPUX86State *env, struct x86_decode *decode)
string_increment_reg(env, R_EDI, decode);
}
-static void exec_cmps(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_cmps(CPUX86State *env, struct x86_decode *decode)
{
if (decode->rep) {
string_rep(env, decode, exec_cmps_single, decode->rep);
@@ -590,7 +590,7 @@ static void exec_cmps(struct CPUX86State *env, struct x86_decode *decode)
}
-static void exec_stos_single(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_stos_single(CPUX86State *env, struct x86_decode *decode)
{
target_ulong addr;
target_ulong val;
@@ -604,7 +604,7 @@ static void exec_stos_single(struct CPUX86State *env, struct x86_decode *decode)
}
-static void exec_stos(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_stos(CPUX86State *env, struct x86_decode *decode)
{
if (decode->rep) {
string_rep(env, decode, exec_stos_single, 0);
@@ -615,7 +615,7 @@ static void exec_stos(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-static void exec_scas_single(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_scas_single(CPUX86State *env, struct x86_decode *decode)
{
target_ulong addr;
@@ -628,7 +628,7 @@ static void exec_scas_single(struct CPUX86State *env, struct x86_decode *decode)
string_increment_reg(env, R_EDI, decode);
}
-static void exec_scas(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_scas(CPUX86State *env, struct x86_decode *decode)
{
decode->op[0].type = X86_VAR_REG;
decode->op[0].reg = R_EAX;
@@ -641,7 +641,7 @@ static void exec_scas(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-static void exec_lods_single(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_lods_single(CPUX86State *env, struct x86_decode *decode)
{
target_ulong addr;
target_ulong val = 0;
@@ -653,7 +653,7 @@ static void exec_lods_single(struct CPUX86State *env, struct x86_decode *decode)
string_increment_reg(env, R_ESI, decode);
}
-static void exec_lods(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_lods(CPUX86State *env, struct x86_decode *decode)
{
if (decode->rep) {
string_rep(env, decode, exec_lods_single, 0);
@@ -760,7 +760,7 @@ void simulate_rdmsr(struct CPUState *cpu)
RDX(env) = (uint32_t)(val >> 32);
}
-static void exec_rdmsr(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_rdmsr(CPUX86State *env, struct x86_decode *decode)
{
simulate_rdmsr(env_cpu(env));
env->eip += decode->len;
@@ -855,7 +855,7 @@ void simulate_wrmsr(struct CPUState *cpu)
printf("write msr %llx\n", RCX(cpu));*/
}
-static void exec_wrmsr(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_wrmsr(CPUX86State *env, struct x86_decode *decode)
{
simulate_wrmsr(env_cpu(env));
env->eip += decode->len;
@@ -865,7 +865,7 @@ static void exec_wrmsr(struct CPUX86State *env, struct x86_decode *decode)
* flag:
* 0 - bt, 1 - btc, 2 - bts, 3 - btr
*/
-static void do_bt(struct CPUX86State *env, struct x86_decode *decode, int flag)
+static void do_bt(CPUX86State *env, struct x86_decode *decode, int flag)
{
int32_t displacement;
uint8_t index;
@@ -911,31 +911,31 @@ static void do_bt(struct CPUX86State *env, struct x86_decode *decode, int flag)
set_CF(env, cf);
}
-static void exec_bt(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_bt(CPUX86State *env, struct x86_decode *decode)
{
do_bt(env, decode, 0);
env->eip += decode->len;
}
-static void exec_btc(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_btc(CPUX86State *env, struct x86_decode *decode)
{
do_bt(env, decode, 1);
env->eip += decode->len;
}
-static void exec_btr(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_btr(CPUX86State *env, struct x86_decode *decode)
{
do_bt(env, decode, 3);
env->eip += decode->len;
}
-static void exec_bts(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_bts(CPUX86State *env, struct x86_decode *decode)
{
do_bt(env, decode, 2);
env->eip += decode->len;
}
-void exec_shl(struct CPUX86State *env, struct x86_decode *decode)
+void exec_shl(CPUX86State *env, struct x86_decode *decode)
{
uint8_t count;
int of = 0, cf = 0;
@@ -1022,7 +1022,7 @@ void exec_movsx(CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-void exec_ror(struct CPUX86State *env, struct x86_decode *decode)
+void exec_ror(CPUX86State *env, struct x86_decode *decode)
{
uint8_t count;
@@ -1100,7 +1100,7 @@ void exec_ror(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-void exec_rol(struct CPUX86State *env, struct x86_decode *decode)
+void exec_rol(CPUX86State *env, struct x86_decode *decode)
{
uint8_t count;
@@ -1182,7 +1182,7 @@ void exec_rol(struct CPUX86State *env, struct x86_decode *decode)
}
-void exec_rcl(struct CPUX86State *env, struct x86_decode *decode)
+void exec_rcl(CPUX86State *env, struct x86_decode *decode)
{
uint8_t count;
int of = 0, cf = 0;
@@ -1267,7 +1267,7 @@ void exec_rcl(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-void exec_rcr(struct CPUX86State *env, struct x86_decode *decode)
+void exec_rcr(CPUX86State *env, struct x86_decode *decode)
{
uint8_t count;
int of = 0, cf = 0;
@@ -1342,7 +1342,7 @@ void exec_rcr(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-static void exec_xchg(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_xchg(CPUX86State *env, struct x86_decode *decode)
{
fetch_operands(env, decode, 2, true, true, false);
@@ -1354,7 +1354,7 @@ static void exec_xchg(struct CPUX86State *env, struct x86_decode *decode)
env->eip += decode->len;
}
-static void exec_xadd(struct CPUX86State *env, struct x86_decode *decode)
+static void exec_xadd(CPUX86State *env, struct x86_decode *decode)
{
EXEC_2OP_FLAGS_CMD(env, decode, +, SET_FLAGS_OSZAPC_ADD, true);
write_val_ext(env, decode->op[1].ptr, decode->op[0].val,
@@ -1365,7 +1365,7 @@ static void exec_xadd(struct CPUX86State *env, struct x86_decode *decode)
static struct cmd_handler {
enum x86_decode_cmd cmd;
- void (*handler)(struct CPUX86State *env, struct x86_decode *ins);
+ void (*handler)(CPUX86State *env, struct x86_decode *ins);
} handlers[] = {
{X86_DECODE_CMD_INVL, NULL,},
{X86_DECODE_CMD_MOV, exec_mov},
@@ -1465,7 +1465,7 @@ void store_regs(struct CPUState *cpu)
macvm_set_rip(cpu, env->eip);
}
-bool exec_instruction(struct CPUX86State *env, struct x86_decode *ins)
+bool exec_instruction(CPUX86State *env, struct x86_decode *ins)
{
/*if (hvf_vcpu_id(cpu))
printf("%d, %llx: exec_instruction %s\n", hvf_vcpu_id(cpu), env->eip,
diff --git a/target/i386/hvf/x86_emu.h b/target/i386/hvf/x86_emu.h
index 233f7b8daa..640da90b30 100644
--- a/target/i386/hvf/x86_emu.h
+++ b/target/i386/hvf/x86_emu.h
@@ -24,7 +24,7 @@
#include "cpu.h"
void init_emu(void);
-bool exec_instruction(struct CPUX86State *env, struct x86_decode *ins);
+bool exec_instruction(CPUX86State *env, struct x86_decode *ins);
void load_regs(struct CPUState *cpu);
void store_regs(struct CPUState *cpu);
@@ -36,15 +36,15 @@ target_ulong read_reg(CPUX86State *env, int reg, int size);
void write_reg(CPUX86State *env, int reg, target_ulong val, int size);
target_ulong read_val_from_reg(target_ulong reg_ptr, int size);
void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size);
-void write_val_ext(struct CPUX86State *env, target_ulong ptr, target_ulong val, int size);
-uint8_t *read_mmio(struct CPUX86State *env, target_ulong ptr, int bytes);
-target_ulong read_val_ext(struct CPUX86State *env, target_ulong ptr, int size);
+void write_val_ext(CPUX86State *env, target_ulong ptr, target_ulong val, int size);
+uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes);
+target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size);
-void exec_movzx(struct CPUX86State *env, struct x86_decode *decode);
-void exec_shl(struct CPUX86State *env, struct x86_decode *decode);
-void exec_movsx(struct CPUX86State *env, struct x86_decode *decode);
-void exec_ror(struct CPUX86State *env, struct x86_decode *decode);
-void exec_rol(struct CPUX86State *env, struct x86_decode *decode);
-void exec_rcl(struct CPUX86State *env, struct x86_decode *decode);
-void exec_rcr(struct CPUX86State *env, struct x86_decode *decode);
+void exec_movzx(CPUX86State *env, struct x86_decode *decode);
+void exec_shl(CPUX86State *env, struct x86_decode *decode);
+void exec_movsx(CPUX86State *env, struct x86_decode *decode);
+void exec_ror(CPUX86State *env, struct x86_decode *decode);
+void exec_rol(CPUX86State *env, struct x86_decode *decode);
+void exec_rcl(CPUX86State *env, struct x86_decode *decode);
+void exec_rcr(CPUX86State *env, struct x86_decode *decode);
#endif
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 2c8feb4a6f..83d0988302 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -37,6 +37,7 @@
#include "qemu/main-loop.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
+#include "qemu/memalign.h"
#include "hw/i386/x86.h"
#include "hw/i386/apic.h"
#include "hw/i386/apic_internal.h"
diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
index 9af261eea3..b97d091a50 100644
--- a/target/i386/nvmm/nvmm-all.c
+++ b/target/i386/nvmm/nvmm-all.c
@@ -85,7 +85,7 @@ nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg)
static void
nvmm_set_registers(CPUState *cpu)
{
- struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
+ CPUX86State *env = cpu->env_ptr;
struct nvmm_machine *mach = get_nvmm_mach();
struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
struct nvmm_vcpu *vcpu = &qcpu->vcpu;
@@ -222,7 +222,7 @@ nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg)
static void
nvmm_get_registers(CPUState *cpu)
{
- struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
+ CPUX86State *env = cpu->env_ptr;
struct nvmm_machine *mach = get_nvmm_mach();
struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
struct nvmm_vcpu *vcpu = &qcpu->vcpu;
@@ -347,7 +347,7 @@ nvmm_get_registers(CPUState *cpu)
static bool
nvmm_can_take_int(CPUState *cpu)
{
- struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
+ CPUX86State *env = cpu->env_ptr;
struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
struct nvmm_vcpu *vcpu = &qcpu->vcpu;
struct nvmm_machine *mach = get_nvmm_mach();
@@ -394,7 +394,7 @@ nvmm_can_take_nmi(CPUState *cpu)
static void
nvmm_vcpu_pre_run(CPUState *cpu)
{
- struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
+ CPUX86State *env = cpu->env_ptr;
struct nvmm_machine *mach = get_nvmm_mach();
struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
struct nvmm_vcpu *vcpu = &qcpu->vcpu;
@@ -480,7 +480,7 @@ static void
nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit)
{
struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
- struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
+ CPUX86State *env = cpu->env_ptr;
X86CPU *x86_cpu = X86_CPU(cpu);
uint64_t tpr;
@@ -652,7 +652,7 @@ static int
nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu,
struct nvmm_vcpu_exit *exit)
{
- struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
+ CPUX86State *env = cpu->env_ptr;
int ret = 0;
qemu_mutex_lock_iothread();
@@ -685,7 +685,7 @@ nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
static int
nvmm_vcpu_loop(CPUState *cpu)
{
- struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
+ CPUX86State *env = cpu->env_ptr;
struct nvmm_machine *mach = get_nvmm_mach();
struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
struct nvmm_vcpu *vcpu = &qcpu->vcpu;
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
index 5ba739fbed..5627772e7c 100644
--- a/target/i386/tcg/sysemu/excp_helper.c
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -19,6 +19,7 @@
#include "qemu/osdep.h"
#include "cpu.h"
+#include "exec/exec-all.h"
#include "tcg/helper-tcg.h"
int get_pg_mode(CPUX86State *env)
diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c
index 9ccaa054c4..3715c1e262 100644
--- a/target/i386/tcg/sysemu/misc_helper.c
+++ b/target/i386/tcg/sysemu/misc_helper.c
@@ -23,6 +23,7 @@
#include "exec/helper-proto.h"
#include "exec/cpu_ldst.h"
#include "exec/address-spaces.h"
+#include "exec/exec-all.h"
#include "tcg/helper-tcg.h"
void helper_outb(CPUX86State *env, uint32_t port, uint32_t data)
diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c
index 6bc47c5309..1d30e4e2ed 100644
--- a/target/i386/whpx/whpx-accel-ops.c
+++ b/target/i386/whpx/whpx-accel-ops.c
@@ -83,12 +83,18 @@ static void whpx_kick_vcpu_thread(CPUState *cpu)
}
}
+static bool whpx_vcpu_thread_is_idle(CPUState *cpu)
+{
+ return !whpx_apic_in_platform();
+}
+
static void whpx_accel_ops_class_init(ObjectClass *oc, void *data)
{
AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
ops->create_vcpu_thread = whpx_start_vcpu_thread;
ops->kick_vcpu_thread = whpx_kick_vcpu_thread;
+ ops->cpu_thread_is_idle = whpx_vcpu_thread_is_idle;
ops->synchronize_post_reset = whpx_cpu_synchronize_post_reset;
ops->synchronize_post_init = whpx_cpu_synchronize_post_init;
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index ef896da0a2..c7e25abf42 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -221,7 +221,7 @@ static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
static int whpx_set_tsc(CPUState *cpu)
{
- struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
+ CPUX86State *env = cpu->env_ptr;
WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
WHV_REGISTER_VALUE tsc_val;
HRESULT hr;
@@ -260,7 +260,7 @@ static void whpx_set_registers(CPUState *cpu, int level)
{
struct whpx_state *whpx = &whpx_global;
struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
- struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
+ CPUX86State *env = cpu->env_ptr;
X86CPU *x86_cpu = X86_CPU(cpu);
struct whpx_register_set vcxt;
HRESULT hr;
@@ -428,7 +428,7 @@ static void whpx_set_registers(CPUState *cpu, int level)
static int whpx_get_tsc(CPUState *cpu)
{
- struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
+ CPUX86State *env = cpu->env_ptr;
WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
WHV_REGISTER_VALUE tsc_val;
HRESULT hr;
@@ -449,7 +449,7 @@ static void whpx_get_registers(CPUState *cpu)
{
struct whpx_state *whpx = &whpx_global;
struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
- struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
+ CPUX86State *env = cpu->env_ptr;
X86CPU *x86_cpu = X86_CPU(cpu);
struct whpx_register_set vcxt;
uint64_t tpr, apic_base;
@@ -760,7 +760,7 @@ static int whpx_handle_portio(CPUState *cpu,
static int whpx_handle_halt(CPUState *cpu)
{
- struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
+ CPUX86State *env = cpu->env_ptr;
int ret = 0;
qemu_mutex_lock_iothread();
@@ -781,7 +781,7 @@ static void whpx_vcpu_pre_run(CPUState *cpu)
HRESULT hr;
struct whpx_state *whpx = &whpx_global;
struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
- struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
+ CPUX86State *env = cpu->env_ptr;
X86CPU *x86_cpu = X86_CPU(cpu);
int irq;
uint8_t tpr;
@@ -903,7 +903,7 @@ static void whpx_vcpu_pre_run(CPUState *cpu)
static void whpx_vcpu_post_run(CPUState *cpu)
{
struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
- struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
+ CPUX86State *env = cpu->env_ptr;
X86CPU *x86_cpu = X86_CPU(cpu);
env->eflags = vcpu->exit_ctx.VpContext.Rflags;
@@ -927,7 +927,7 @@ static void whpx_vcpu_post_run(CPUState *cpu)
static void whpx_vcpu_process_async_events(CPUState *cpu)
{
- struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
+ CPUX86State *env = cpu->env_ptr;
X86CPU *x86_cpu = X86_CPU(cpu);
struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
@@ -1333,7 +1333,7 @@ int whpx_init_vcpu(CPUState *cpu)
struct whpx_state *whpx = &whpx_global;
struct whpx_vcpu *vcpu = NULL;
Error *local_error = NULL;
- struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
+ CPUX86State *env = cpu->env_ptr;
X86CPU *x86_cpu = X86_CPU(cpu);
UINT64 freq = 0;
int ret;
diff --git a/target/m68k/cpu-qom.h b/target/m68k/cpu-qom.h
index 1ceb160ecb..cd9687192c 100644
--- a/target/m68k/cpu-qom.h
+++ b/target/m68k/cpu-qom.h
@@ -25,8 +25,7 @@
#define TYPE_M68K_CPU "m68k-cpu"
-OBJECT_DECLARE_TYPE(M68kCPU, M68kCPUClass,
- M68K_CPU)
+OBJECT_DECLARE_CPU_TYPE(M68kCPU, M68kCPUClass, M68K_CPU)
/*
* M68kCPUClass:
diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h
index a3423729ef..872e8ce637 100644
--- a/target/m68k/cpu.h
+++ b/target/m68k/cpu.h
@@ -79,7 +79,7 @@
typedef CPU_LDoubleU FPReg;
-typedef struct CPUM68KState {
+typedef struct CPUArchState {
uint32_t dregs[8];
uint32_t aregs[8];
uint32_t pc;
@@ -156,7 +156,7 @@ typedef struct CPUM68KState {
*
* A Motorola 68k CPU.
*/
-struct M68kCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -574,9 +574,6 @@ void m68k_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr,
int mmu_idx, MemTxAttrs attrs,
MemTxResult response, uintptr_t retaddr);
-typedef CPUM68KState CPUArchState;
-typedef M68kCPU ArchCPU;
-
#include "exec/cpu-all.h"
/* TB flags */
diff --git a/target/microblaze/cpu-qom.h b/target/microblaze/cpu-qom.h
index e520eefb12..255b39a45d 100644
--- a/target/microblaze/cpu-qom.h
+++ b/target/microblaze/cpu-qom.h
@@ -25,8 +25,7 @@
#define TYPE_MICROBLAZE_CPU "microblaze-cpu"
-OBJECT_DECLARE_TYPE(MicroBlazeCPU, MicroBlazeCPUClass,
- MICROBLAZE_CPU)
+OBJECT_DECLARE_CPU_TYPE(MicroBlazeCPU, MicroBlazeCPUClass, MICROBLAZE_CPU)
/**
* MicroBlazeCPUClass:
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
index e9cd0b88de..0a0ce71b6a 100644
--- a/target/microblaze/cpu.h
+++ b/target/microblaze/cpu.h
@@ -24,7 +24,7 @@
#include "exec/cpu-defs.h"
#include "fpu/softfloat-types.h"
-typedef struct CPUMBState CPUMBState;
+typedef struct CPUArchState CPUMBState;
#if !defined(CONFIG_USER_ONLY)
#include "mmu.h"
#endif
@@ -239,7 +239,7 @@ typedef struct CPUMBState CPUMBState;
#define USE_NON_SECURE_M_AXI_DC_MASK 0x4
#define USE_NON_SECURE_M_AXI_IC_MASK 0x8
-struct CPUMBState {
+struct CPUArchState {
uint32_t bvalue; /* TCG temporary, only valid during a TB */
uint32_t btarget; /* Full resolved branch destination */
@@ -339,7 +339,7 @@ typedef struct {
*
* A MicroBlaze CPU.
*/
-struct MicroBlazeCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
@@ -394,9 +394,6 @@ void mb_tcg_init(void);
#define MMU_USER_IDX 2
/* See NB_MMU_MODES further up the file. */
-typedef CPUMBState CPUArchState;
-typedef MicroBlazeCPU ArchCPU;
-
#include "exec/cpu-all.h"
/* Ensure there is no overlap between the two masks. */
diff --git a/target/microblaze/mmu.h b/target/microblaze/mmu.h
index b6b4b9ad60..1068bd2d52 100644
--- a/target/microblaze/mmu.h
+++ b/target/microblaze/mmu.h
@@ -20,6 +20,8 @@
#ifndef TARGET_MICROBLAZE_MMU_H
#define TARGET_MICROBLAZE_MMU_H
+#include "cpu.h"
+
#define MMU_R_PID 0
#define MMU_R_ZPR 1
#define MMU_R_TLBX 2
diff --git a/target/mips/cpu-qom.h b/target/mips/cpu-qom.h
index dda0c911fa..e28b529607 100644
--- a/target/mips/cpu-qom.h
+++ b/target/mips/cpu-qom.h
@@ -29,8 +29,7 @@
#define TYPE_MIPS_CPU "mips-cpu"
#endif
-OBJECT_DECLARE_TYPE(MIPSCPU, MIPSCPUClass,
- MIPS_CPU)
+OBJECT_DECLARE_CPU_TYPE(MIPSCPU, MIPSCPUClass, MIPS_CPU)
/**
* MIPSCPUClass:
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index 56b1cbd091..09e98f64de 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -524,8 +524,7 @@ struct TCState {
};
struct MIPSITUState;
-typedef struct CPUMIPSState CPUMIPSState;
-struct CPUMIPSState {
+typedef struct CPUArchState {
TCState active_tc;
CPUMIPSFPUContext active_fpu;
@@ -1161,7 +1160,7 @@ struct CPUMIPSState {
QEMUTimer *timer; /* Internal timer */
target_ulong exception_base; /* ExceptionBase input to the core */
uint64_t cp0_count_ns; /* CP0_Count clock period (in nanoseconds) */
-};
+} CPUMIPSState;
/**
* MIPSCPU:
@@ -1172,7 +1171,7 @@ struct CPUMIPSState {
*
* A MIPS CPU.
*/
-struct MIPSCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -1218,9 +1217,6 @@ static inline int cpu_mmu_index(CPUMIPSState *env, bool ifetch)
return hflags_mmu_index(env->hflags);
}
-typedef CPUMIPSState CPUArchState;
-typedef MIPSCPU ArchCPU;
-
#include "exec/cpu-all.h"
/* Exceptions */
diff --git a/target/mips/internal.h b/target/mips/internal.h
index daddb05fd4..ac6e03e2f2 100644
--- a/target/mips/internal.h
+++ b/target/mips/internal.h
@@ -12,6 +12,7 @@
#ifdef CONFIG_TCG
#include "tcg/tcg-internal.h"
#endif
+#include "cpu.h"
/*
* MMU types, the first four entries have the same layout as the
@@ -133,14 +134,14 @@ struct r4k_tlb_t {
struct CPUMIPSTLBContext {
uint32_t nb_tlb;
uint32_t tlb_in_use;
- int (*map_address)(struct CPUMIPSState *env, hwaddr *physical, int *prot,
+ int (*map_address)(CPUMIPSState *env, hwaddr *physical, int *prot,
target_ulong address, MMUAccessType access_type);
- void (*helper_tlbwi)(struct CPUMIPSState *env);
- void (*helper_tlbwr)(struct CPUMIPSState *env);
- void (*helper_tlbp)(struct CPUMIPSState *env);
- void (*helper_tlbr)(struct CPUMIPSState *env);
- void (*helper_tlbinv)(struct CPUMIPSState *env);
- void (*helper_tlbinvf)(struct CPUMIPSState *env);
+ void (*helper_tlbwi)(CPUMIPSState *env);
+ void (*helper_tlbwr)(CPUMIPSState *env);
+ void (*helper_tlbp)(CPUMIPSState *env);
+ void (*helper_tlbr)(CPUMIPSState *env);
+ void (*helper_tlbinv)(CPUMIPSState *env);
+ void (*helper_tlbinvf)(CPUMIPSState *env);
union {
struct {
r4k_tlb_t tlb[MIPS_TLB_MAX];
diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c
index 4cade61e93..6975ae4bdb 100644
--- a/target/nios2/cpu.c
+++ b/target/nios2/cpu.c
@@ -73,12 +73,9 @@ static void nios2_cpu_set_irq(void *opaque, int irq, int level)
env->regs[CR_IPENDING] = deposit32(env->regs[CR_IPENDING], irq, 1, !!level);
- env->irq_pending = env->regs[CR_IPENDING] & env->regs[CR_IENABLE];
-
- if (env->irq_pending && (env->regs[CR_STATUS] & CR_STATUS_PIE)) {
- env->irq_pending = 0;
+ if (env->regs[CR_IPENDING]) {
cpu_interrupt(cs, CPU_INTERRUPT_HARD);
- } else if (!env->irq_pending) {
+ } else {
cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
}
}
@@ -134,7 +131,8 @@ static bool nios2_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
CPUNios2State *env = &cpu->env;
if ((interrupt_request & CPU_INTERRUPT_HARD) &&
- (env->regs[CR_STATUS] & CR_STATUS_PIE)) {
+ (env->regs[CR_STATUS] & CR_STATUS_PIE) &&
+ (env->regs[CR_IPENDING] & env->regs[CR_IENABLE])) {
cs->exception_index = EXCP_IRQ;
nios2_cpu_do_interrupt(cs);
return true;
diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h
index d2ba0c5bbd..ca0f3420cd 100644
--- a/target/nios2/cpu.h
+++ b/target/nios2/cpu.h
@@ -25,15 +25,14 @@
#include "hw/core/cpu.h"
#include "qom/object.h"
-typedef struct CPUNios2State CPUNios2State;
+typedef struct CPUArchState CPUNios2State;
#if !defined(CONFIG_USER_ONLY)
#include "mmu.h"
#endif
#define TYPE_NIOS2_CPU "nios2-cpu"
-OBJECT_DECLARE_TYPE(Nios2CPU, Nios2CPUClass,
- NIOS2_CPU)
+OBJECT_DECLARE_CPU_TYPE(Nios2CPU, Nios2CPUClass, NIOS2_CPU)
/**
* Nios2CPUClass:
@@ -155,12 +154,11 @@ struct Nios2CPUClass {
#define CPU_INTERRUPT_NMI CPU_INTERRUPT_TGT_EXT_3
-struct CPUNios2State {
+struct CPUArchState {
uint32_t regs[NUM_CORE_REGS];
#if !defined(CONFIG_USER_ONLY)
Nios2MMU mmu;
- uint32_t irq_pending;
#endif
int error_code;
};
@@ -171,7 +169,7 @@ struct CPUNios2State {
*
* A Nios2 CPU.
*/
-struct Nios2CPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
diff --git a/target/nios2/helper.h b/target/nios2/helper.h
index 6c8f0b5b35..a44ecfdf7a 100644
--- a/target/nios2/helper.h
+++ b/target/nios2/helper.h
@@ -21,7 +21,7 @@
DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32)
#if !defined(CONFIG_USER_ONLY)
-DEF_HELPER_2(mmu_read_debug, void, env, i32)
-DEF_HELPER_3(mmu_write, void, env, i32, i32)
-DEF_HELPER_1(check_interrupts, void, env)
+DEF_HELPER_2(mmu_write_tlbacc, void, env, i32)
+DEF_HELPER_2(mmu_write_tlbmisc, void, env, i32)
+DEF_HELPER_2(mmu_write_pteaddr, void, env, i32)
#endif
diff --git a/target/nios2/meson.build b/target/nios2/meson.build
index e643917db1..62b384702d 100644
--- a/target/nios2/meson.build
+++ b/target/nios2/meson.build
@@ -2,14 +2,13 @@ nios2_ss = ss.source_set()
nios2_ss.add(files(
'cpu.c',
'helper.c',
- 'mmu.c',
'nios2-semi.c',
'op_helper.c',
'translate.c',
))
nios2_softmmu_ss = ss.source_set()
-nios2_softmmu_ss.add(files('monitor.c'))
+nios2_softmmu_ss.add(files('monitor.c', 'mmu.c'))
target_arch += {'nios2': nios2_ss}
target_softmmu_arch += {'nios2': nios2_softmmu_ss}
diff --git a/target/nios2/mmu.c b/target/nios2/mmu.c
index 2545c06761..4daab2a7ab 100644
--- a/target/nios2/mmu.c
+++ b/target/nios2/mmu.c
@@ -23,37 +23,9 @@
#include "cpu.h"
#include "exec/exec-all.h"
#include "mmu.h"
+#include "exec/helper-proto.h"
+#include "trace/trace-target_nios2.h"
-#if !defined(CONFIG_USER_ONLY)
-
-/* Define this to enable MMU debug messages */
-/* #define DEBUG_MMU */
-
-#ifdef DEBUG_MMU
-#define MMU_LOG(x) x
-#else
-#define MMU_LOG(x)
-#endif
-
-void mmu_read_debug(CPUNios2State *env, uint32_t rn)
-{
- switch (rn) {
- case CR_TLBACC:
- MMU_LOG(qemu_log("TLBACC READ %08X\n", env->regs[rn]));
- break;
-
- case CR_TLBMISC:
- MMU_LOG(qemu_log("TLBMISC READ %08X\n", env->regs[rn]));
- break;
-
- case CR_PTEADDR:
- MMU_LOG(qemu_log("PTEADDR READ %08X\n", env->regs[rn]));
- break;
-
- default:
- break;
- }
-}
/* rw - 0 = read, 1 = write, 2 = fetch. */
unsigned int mmu_translate(CPUNios2State *env,
@@ -63,37 +35,26 @@ unsigned int mmu_translate(CPUNios2State *env,
Nios2CPU *cpu = env_archcpu(env);
int pid = (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK) >> 4;
int vpn = vaddr >> 12;
+ int way, n_ways = cpu->tlb_num_ways;
- MMU_LOG(qemu_log("mmu_translate vaddr %08X, pid %08X, vpn %08X\n",
- vaddr, pid, vpn));
-
- int way;
- for (way = 0; way < cpu->tlb_num_ways; way++) {
-
- Nios2TLBEntry *entry =
- &env->mmu.tlb[(way * cpu->tlb_num_ways) +
- (vpn & env->mmu.tlb_entry_mask)];
-
- MMU_LOG(qemu_log("TLB[%d] TAG %08X, VPN %08X\n",
- (way * cpu->tlb_num_ways) +
- (vpn & env->mmu.tlb_entry_mask),
- entry->tag, (entry->tag >> 12)));
+ for (way = 0; way < n_ways; way++) {
+ uint32_t index = (way * n_ways) + (vpn & env->mmu.tlb_entry_mask);
+ Nios2TLBEntry *entry = &env->mmu.tlb[index];
if (((entry->tag >> 12) != vpn) ||
(((entry->tag & (1 << 11)) == 0) &&
((entry->tag & ((1 << cpu->pid_num_bits) - 1)) != pid))) {
+ trace_nios2_mmu_translate_miss(vaddr, pid, index, entry->tag);
continue;
}
+
lu->vaddr = vaddr & TARGET_PAGE_MASK;
lu->paddr = (entry->data & CR_TLBACC_PFN_MASK) << TARGET_PAGE_BITS;
lu->prot = ((entry->data & CR_TLBACC_R) ? PAGE_READ : 0) |
((entry->data & CR_TLBACC_W) ? PAGE_WRITE : 0) |
((entry->data & CR_TLBACC_X) ? PAGE_EXEC : 0);
- MMU_LOG(qemu_log("HIT TLB[%d] %08X %08X %08X\n",
- (way * cpu->tlb_num_ways) +
- (vpn & env->mmu.tlb_entry_mask),
- lu->vaddr, lu->paddr, lu->prot));
+ trace_nios2_mmu_translate_hit(vaddr, pid, index, lu->paddr, lu->prot);
return 1;
}
return 0;
@@ -104,141 +65,119 @@ static void mmu_flush_pid(CPUNios2State *env, uint32_t pid)
CPUState *cs = env_cpu(env);
Nios2CPU *cpu = env_archcpu(env);
int idx;
- MMU_LOG(qemu_log("TLB Flush PID %d\n", pid));
for (idx = 0; idx < cpu->tlb_num_entries; idx++) {
Nios2TLBEntry *entry = &env->mmu.tlb[idx];
- MMU_LOG(qemu_log("TLB[%d] => %08X %08X\n",
- idx, entry->tag, entry->data));
-
if ((entry->tag & (1 << 10)) && (!(entry->tag & (1 << 11))) &&
((entry->tag & ((1 << cpu->pid_num_bits) - 1)) == pid)) {
uint32_t vaddr = entry->tag & TARGET_PAGE_MASK;
- MMU_LOG(qemu_log("TLB Flush Page %08X\n", vaddr));
-
+ trace_nios2_mmu_flush_pid_hit(pid, idx, vaddr);
tlb_flush_page(cs, vaddr);
+ } else {
+ trace_nios2_mmu_flush_pid_miss(pid, idx, entry->tag);
}
}
}
-void mmu_write(CPUNios2State *env, uint32_t rn, uint32_t v)
+void helper_mmu_write_tlbacc(CPUNios2State *env, uint32_t v)
{
CPUState *cs = env_cpu(env);
Nios2CPU *cpu = env_archcpu(env);
- MMU_LOG(qemu_log("mmu_write %08X = %08X\n", rn, v));
-
- switch (rn) {
- case CR_TLBACC:
- MMU_LOG(qemu_log("TLBACC: IG %02X, FLAGS %c%c%c%c%c, PFN %05X\n",
- v >> CR_TLBACC_IGN_SHIFT,
- (v & CR_TLBACC_C) ? 'C' : '.',
- (v & CR_TLBACC_R) ? 'R' : '.',
- (v & CR_TLBACC_W) ? 'W' : '.',
- (v & CR_TLBACC_X) ? 'X' : '.',
- (v & CR_TLBACC_G) ? 'G' : '.',
- v & CR_TLBACC_PFN_MASK));
-
- /* if tlbmisc.WE == 1 then trigger a TLB write on writes to TLBACC */
- if (env->regs[CR_TLBMISC] & CR_TLBMISC_WR) {
- int way = (env->regs[CR_TLBMISC] >> CR_TLBMISC_WAY_SHIFT);
- int vpn = (env->mmu.pteaddr_wr & CR_PTEADDR_VPN_MASK) >> 2;
- int pid = (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK) >> 4;
- int g = (v & CR_TLBACC_G) ? 1 : 0;
- int valid = ((vpn & CR_TLBACC_PFN_MASK) < 0xC0000) ? 1 : 0;
- Nios2TLBEntry *entry =
- &env->mmu.tlb[(way * cpu->tlb_num_ways) +
- (vpn & env->mmu.tlb_entry_mask)];
- uint32_t newTag = (vpn << 12) | (g << 11) | (valid << 10) | pid;
- uint32_t newData = v & (CR_TLBACC_C | CR_TLBACC_R | CR_TLBACC_W |
- CR_TLBACC_X | CR_TLBACC_PFN_MASK);
-
- if ((entry->tag != newTag) || (entry->data != newData)) {
- if (entry->tag & (1 << 10)) {
- /* Flush existing entry */
- MMU_LOG(qemu_log("TLB Flush Page (OLD) %08X\n",
- entry->tag & TARGET_PAGE_MASK));
- tlb_flush_page(cs, entry->tag & TARGET_PAGE_MASK);
- }
- entry->tag = newTag;
- entry->data = newData;
- MMU_LOG(qemu_log("TLB[%d] = %08X %08X\n",
- (way * cpu->tlb_num_ways) +
- (vpn & env->mmu.tlb_entry_mask),
- entry->tag, entry->data));
+ trace_nios2_mmu_write_tlbacc(v >> CR_TLBACC_IGN_SHIFT,
+ (v & CR_TLBACC_C) ? 'C' : '.',
+ (v & CR_TLBACC_R) ? 'R' : '.',
+ (v & CR_TLBACC_W) ? 'W' : '.',
+ (v & CR_TLBACC_X) ? 'X' : '.',
+ (v & CR_TLBACC_G) ? 'G' : '.',
+ v & CR_TLBACC_PFN_MASK);
+
+ /* if tlbmisc.WE == 1 then trigger a TLB write on writes to TLBACC */
+ if (env->regs[CR_TLBMISC] & CR_TLBMISC_WR) {
+ int way = (env->regs[CR_TLBMISC] >> CR_TLBMISC_WAY_SHIFT);
+ int vpn = (env->mmu.pteaddr_wr & CR_PTEADDR_VPN_MASK) >> 2;
+ int pid = (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK) >> 4;
+ int g = (v & CR_TLBACC_G) ? 1 : 0;
+ int valid = ((vpn & CR_TLBACC_PFN_MASK) < 0xC0000) ? 1 : 0;
+ Nios2TLBEntry *entry =
+ &env->mmu.tlb[(way * cpu->tlb_num_ways) +
+ (vpn & env->mmu.tlb_entry_mask)];
+ uint32_t newTag = (vpn << 12) | (g << 11) | (valid << 10) | pid;
+ uint32_t newData = v & (CR_TLBACC_C | CR_TLBACC_R | CR_TLBACC_W |
+ CR_TLBACC_X | CR_TLBACC_PFN_MASK);
+
+ if ((entry->tag != newTag) || (entry->data != newData)) {
+ if (entry->tag & (1 << 10)) {
+ /* Flush existing entry */
+ tlb_flush_page(cs, entry->tag & TARGET_PAGE_MASK);
}
- /* Auto-increment tlbmisc.WAY */
- env->regs[CR_TLBMISC] =
- (env->regs[CR_TLBMISC] & ~CR_TLBMISC_WAY_MASK) |
- (((way + 1) & (cpu->tlb_num_ways - 1)) <<
- CR_TLBMISC_WAY_SHIFT);
+ entry->tag = newTag;
+ entry->data = newData;
}
+ /* Auto-increment tlbmisc.WAY */
+ env->regs[CR_TLBMISC] =
+ (env->regs[CR_TLBMISC] & ~CR_TLBMISC_WAY_MASK) |
+ (((way + 1) & (cpu->tlb_num_ways - 1)) <<
+ CR_TLBMISC_WAY_SHIFT);
+ }
- /* Writes to TLBACC don't change the read-back value */
- env->mmu.tlbacc_wr = v;
- break;
-
- case CR_TLBMISC:
- MMU_LOG(qemu_log("TLBMISC: WAY %X, FLAGS %c%c%c%c%c%c, PID %04X\n",
- v >> CR_TLBMISC_WAY_SHIFT,
- (v & CR_TLBMISC_RD) ? 'R' : '.',
- (v & CR_TLBMISC_WR) ? 'W' : '.',
- (v & CR_TLBMISC_DBL) ? '2' : '.',
- (v & CR_TLBMISC_BAD) ? 'B' : '.',
- (v & CR_TLBMISC_PERM) ? 'P' : '.',
- (v & CR_TLBMISC_D) ? 'D' : '.',
- (v & CR_TLBMISC_PID_MASK) >> 4));
+ /* Writes to TLBACC don't change the read-back value */
+ env->mmu.tlbacc_wr = v;
+}
- if ((v & CR_TLBMISC_PID_MASK) !=
- (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK)) {
- mmu_flush_pid(env, (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK) >>
- CR_TLBMISC_PID_SHIFT);
- }
- /* if tlbmisc.RD == 1 then trigger a TLB read on writes to TLBMISC */
- if (v & CR_TLBMISC_RD) {
- int way = (v >> CR_TLBMISC_WAY_SHIFT);
- int vpn = (env->mmu.pteaddr_wr & CR_PTEADDR_VPN_MASK) >> 2;
- Nios2TLBEntry *entry =
- &env->mmu.tlb[(way * cpu->tlb_num_ways) +
- (vpn & env->mmu.tlb_entry_mask)];
+void helper_mmu_write_tlbmisc(CPUNios2State *env, uint32_t v)
+{
+ Nios2CPU *cpu = env_archcpu(env);
- env->regs[CR_TLBACC] &= CR_TLBACC_IGN_MASK;
- env->regs[CR_TLBACC] |= entry->data;
- env->regs[CR_TLBACC] |= (entry->tag & (1 << 11)) ? CR_TLBACC_G : 0;
- env->regs[CR_TLBMISC] =
- (v & ~CR_TLBMISC_PID_MASK) |
- ((entry->tag & ((1 << cpu->pid_num_bits) - 1)) <<
- CR_TLBMISC_PID_SHIFT);
- env->regs[CR_PTEADDR] &= ~CR_PTEADDR_VPN_MASK;
- env->regs[CR_PTEADDR] |= (entry->tag >> 12) << CR_PTEADDR_VPN_SHIFT;
- MMU_LOG(qemu_log("TLB READ way %d, vpn %05X, tag %08X, data %08X, "
- "tlbacc %08X, tlbmisc %08X, pteaddr %08X\n",
- way, vpn, entry->tag, entry->data,
- env->regs[CR_TLBACC], env->regs[CR_TLBMISC],
- env->regs[CR_PTEADDR]));
- } else {
- env->regs[CR_TLBMISC] = v;
- }
+ trace_nios2_mmu_write_tlbmisc(v >> CR_TLBMISC_WAY_SHIFT,
+ (v & CR_TLBMISC_RD) ? 'R' : '.',
+ (v & CR_TLBMISC_WR) ? 'W' : '.',
+ (v & CR_TLBMISC_DBL) ? '2' : '.',
+ (v & CR_TLBMISC_BAD) ? 'B' : '.',
+ (v & CR_TLBMISC_PERM) ? 'P' : '.',
+ (v & CR_TLBMISC_D) ? 'D' : '.',
+ (v & CR_TLBMISC_PID_MASK) >> 4);
+
+ if ((v & CR_TLBMISC_PID_MASK) !=
+ (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK)) {
+ mmu_flush_pid(env, (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK) >>
+ CR_TLBMISC_PID_SHIFT);
+ }
+ /* if tlbmisc.RD == 1 then trigger a TLB read on writes to TLBMISC */
+ if (v & CR_TLBMISC_RD) {
+ int way = (v >> CR_TLBMISC_WAY_SHIFT);
+ int vpn = (env->mmu.pteaddr_wr & CR_PTEADDR_VPN_MASK) >> 2;
+ Nios2TLBEntry *entry =
+ &env->mmu.tlb[(way * cpu->tlb_num_ways) +
+ (vpn & env->mmu.tlb_entry_mask)];
- env->mmu.tlbmisc_wr = v;
- break;
+ env->regs[CR_TLBACC] &= CR_TLBACC_IGN_MASK;
+ env->regs[CR_TLBACC] |= entry->data;
+ env->regs[CR_TLBACC] |= (entry->tag & (1 << 11)) ? CR_TLBACC_G : 0;
+ env->regs[CR_TLBMISC] =
+ (v & ~CR_TLBMISC_PID_MASK) |
+ ((entry->tag & ((1 << cpu->pid_num_bits) - 1)) <<
+ CR_TLBMISC_PID_SHIFT);
+ env->regs[CR_PTEADDR] &= ~CR_PTEADDR_VPN_MASK;
+ env->regs[CR_PTEADDR] |= (entry->tag >> 12) << CR_PTEADDR_VPN_SHIFT;
+ } else {
+ env->regs[CR_TLBMISC] = v;
+ }
- case CR_PTEADDR:
- MMU_LOG(qemu_log("PTEADDR: PTBASE %03X, VPN %05X\n",
- v >> CR_PTEADDR_PTBASE_SHIFT,
- (v & CR_PTEADDR_VPN_MASK) >> CR_PTEADDR_VPN_SHIFT));
+ env->mmu.tlbmisc_wr = v;
+}
- /* Writes to PTEADDR don't change the read-back VPN value */
- env->regs[CR_PTEADDR] = (v & ~CR_PTEADDR_VPN_MASK) |
- (env->regs[CR_PTEADDR] & CR_PTEADDR_VPN_MASK);
- env->mmu.pteaddr_wr = v;
- break;
+void helper_mmu_write_pteaddr(CPUNios2State *env, uint32_t v)
+{
+ trace_nios2_mmu_write_pteaddr(v >> CR_PTEADDR_PTBASE_SHIFT,
+ (v & CR_PTEADDR_VPN_MASK) >> CR_PTEADDR_VPN_SHIFT);
- default:
- break;
- }
+ /* Writes to PTEADDR don't change the read-back VPN value */
+ env->regs[CR_PTEADDR] = (v & ~CR_PTEADDR_VPN_MASK) |
+ (env->regs[CR_PTEADDR] & CR_PTEADDR_VPN_MASK);
+ env->mmu.pteaddr_wr = v;
}
void mmu_init(CPUNios2State *env)
@@ -246,8 +185,6 @@ void mmu_init(CPUNios2State *env)
Nios2CPU *cpu = env_archcpu(env);
Nios2MMU *mmu = &env->mmu;
- MMU_LOG(qemu_log("mmu_init\n"));
-
mmu->tlb_entry_mask = (cpu->tlb_num_entries / cpu->tlb_num_ways) - 1;
mmu->tlb = g_new0(Nios2TLBEntry, cpu->tlb_num_entries);
}
@@ -277,5 +214,3 @@ void dump_mmu(CPUNios2State *env)
(entry->data & CR_TLBACC_X) ? 'X' : '-');
}
}
-
-#endif /* !CONFIG_USER_ONLY */
diff --git a/target/nios2/mmu.h b/target/nios2/mmu.h
index 4f46fbb82e..5b085900fb 100644
--- a/target/nios2/mmu.h
+++ b/target/nios2/mmu.h
@@ -21,6 +21,8 @@
#ifndef NIOS2_MMU_H
#define NIOS2_MMU_H
+#include "cpu.h"
+
typedef struct Nios2TLBEntry {
target_ulong tag;
target_ulong data;
@@ -44,7 +46,6 @@ void mmu_flip_um(CPUNios2State *env, unsigned int um);
unsigned int mmu_translate(CPUNios2State *env,
Nios2MMULookup *lu,
target_ulong vaddr, int rw, int mmu_idx);
-void mmu_read_debug(CPUNios2State *env, uint32_t rn);
void mmu_write(CPUNios2State *env, uint32_t rn, uint32_t v);
void mmu_init(CPUNios2State *env);
diff --git a/target/nios2/op_helper.c b/target/nios2/op_helper.c
index a59003855a..caa885f7b4 100644
--- a/target/nios2/op_helper.c
+++ b/target/nios2/op_helper.c
@@ -21,38 +21,9 @@
#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/helper-proto.h"
-#include "exec/cpu_ldst.h"
#include "exec/exec-all.h"
#include "qemu/main-loop.h"
-#if !defined(CONFIG_USER_ONLY)
-void helper_mmu_read_debug(CPUNios2State *env, uint32_t rn)
-{
- mmu_read_debug(env, rn);
-}
-
-void helper_mmu_write(CPUNios2State *env, uint32_t rn, uint32_t v)
-{
- mmu_write(env, rn, v);
-}
-
-static void nios2_check_interrupts(CPUNios2State *env)
-{
- if (env->irq_pending &&
- (env->regs[CR_STATUS] & CR_STATUS_PIE)) {
- env->irq_pending = 0;
- cpu_interrupt(env_cpu(env), CPU_INTERRUPT_HARD);
- }
-}
-
-void helper_check_interrupts(CPUNios2State *env)
-{
- qemu_mutex_lock_iothread();
- nios2_check_interrupts(env);
- qemu_mutex_unlock_iothread();
-}
-#endif /* !CONFIG_USER_ONLY */
-
void helper_raise_exception(CPUNios2State *env, uint32_t index)
{
CPUState *cs = env_cpu(env);
diff --git a/target/nios2/trace-events b/target/nios2/trace-events
new file mode 100644
index 0000000000..07f1f0a5e7
--- /dev/null
+++ b/target/nios2/trace-events
@@ -0,0 +1,10 @@
+# mmu.c
+nios2_mmu_translate_miss(uint32_t vaddr, uint32_t pid, uint32_t index, uint32_t tag) "mmu_translate: MISS vaddr=0x%08x pid=%u TLB[%u] tag=0x%08x"
+nios2_mmu_translate_hit(uint32_t vaddr, uint32_t pid, uint32_t index, uint32_t paddr, uint32_t prot) "mmu_translate: HIT vaddr=0x%08x pid=%u TLB[%u] paddr=0x%08x prot=0x%x"
+
+nios2_mmu_flush_pid_miss(uint32_t pid, uint32_t index, uint32_t vaddr) "mmu_flush: MISS pid=%u TLB[%u] tag=0x%08x"
+nios2_mmu_flush_pid_hit(uint32_t pid, uint32_t index, uint32_t vaddr) "mmu_flush: HIT pid=%u TLB[%u] vaddr=0x%08x"
+
+nios2_mmu_write_tlbacc(uint32_t ig, char c, char r, char w, char x, char g, uint32_t pfn) "mmu_write_tlbacc: ig=0x%02x flags=%c%c%c%c%c pfn=0x%08x"
+nios2_mmu_write_tlbmisc(uint32_t way, char r, char w, char t, char b, char p, char d, uint32_t pid) "mmu_write_tlbmisc: way=0x%x flags=%c%c%c%c%c%c pid=%u"
+nios2_mmu_write_pteaddr(uint32_t ptb, uint32_t vpn) "mmu_write_pteaddr: ptbase=0x%03x vpn=0x%05x"
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index f9abc2fdd2..f89271dbed 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -447,28 +447,24 @@ static void rdctl(DisasContext *dc, uint32_t code, uint32_t flags)
gen_check_supervisor(dc);
- switch (instr.imm5 + CR_BASE) {
- case CR_PTEADDR:
- case CR_TLBACC:
- case CR_TLBMISC:
- {
-#if !defined(CONFIG_USER_ONLY)
- if (likely(instr.c != R_ZERO)) {
- tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]);
-#ifdef DEBUG_MMU
- TCGv_i32 tmp = tcg_const_i32(instr.imm5 + CR_BASE);
- gen_helper_mmu_read_debug(cpu_R[instr.c], cpu_env, tmp);
- tcg_temp_free_i32(tmp);
-#endif
- }
-#endif
- break;
+ if (unlikely(instr.c == R_ZERO)) {
+ return;
}
+ switch (instr.imm5 + CR_BASE) {
+ case CR_IPENDING:
+ /*
+ * The value of the ipending register is synthetic.
+ * In hw, this is the AND of a set of hardware irq lines
+ * with the ienable register. In qemu, we re-use the space
+ * of CR_IPENDING to store the set of irq lines, and so we
+ * must perform the AND here, and anywhere else we need the
+ * guest value of ipending.
+ */
+ tcg_gen_and_tl(cpu_R[instr.c], cpu_R[CR_IPENDING], cpu_R[CR_IENABLE]);
+ break;
default:
- if (likely(instr.c != R_ZERO)) {
- tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]);
- }
+ tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]);
break;
}
}
@@ -476,36 +472,33 @@ static void rdctl(DisasContext *dc, uint32_t code, uint32_t flags)
/* ctlN <- rA */
static void wrctl(DisasContext *dc, uint32_t code, uint32_t flags)
{
- R_TYPE(instr, code);
-
gen_check_supervisor(dc);
+#ifndef CONFIG_USER_ONLY
+ R_TYPE(instr, code);
+ TCGv v = load_gpr(dc, instr.a);
+
switch (instr.imm5 + CR_BASE) {
case CR_PTEADDR:
+ gen_helper_mmu_write_pteaddr(cpu_env, v);
+ break;
case CR_TLBACC:
+ gen_helper_mmu_write_tlbacc(cpu_env, v);
+ break;
case CR_TLBMISC:
- {
-#if !defined(CONFIG_USER_ONLY)
- TCGv_i32 tmp = tcg_const_i32(instr.imm5 + CR_BASE);
- gen_helper_mmu_write(cpu_env, tmp, load_gpr(dc, instr.a));
- tcg_temp_free_i32(tmp);
-#endif
+ gen_helper_mmu_write_tlbmisc(cpu_env, v);
break;
- }
-
- default:
- tcg_gen_mov_tl(cpu_R[instr.imm5 + CR_BASE], load_gpr(dc, instr.a));
+ case CR_IPENDING:
+ /* ipending is read only, writes ignored. */
break;
- }
-
- /* If interrupts were enabled using WRCTL, trigger them. */
-#if !defined(CONFIG_USER_ONLY)
- if ((instr.imm5 + CR_BASE) == CR_STATUS) {
- if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) {
- gen_io_start();
- }
- gen_helper_check_interrupts(cpu_env);
+ case CR_STATUS:
+ case CR_IENABLE:
+ /* If interrupts were enabled using WRCTL, trigger them. */
dc->base.is_jmp = DISAS_UPDATE;
+ /* fall through */
+ default:
+ tcg_gen_mov_tl(cpu_R[instr.imm5 + CR_BASE], v);
+ break;
}
#endif
}
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
index ee069b080c..bdf29d2dc4 100644
--- a/target/openrisc/cpu.h
+++ b/target/openrisc/cpu.h
@@ -24,13 +24,9 @@
#include "hw/core/cpu.h"
#include "qom/object.h"
-/* cpu_openrisc_map_address_* in CPUOpenRISCTLBContext need this decl. */
-struct OpenRISCCPU;
-
#define TYPE_OPENRISC_CPU "or1k-cpu"
-OBJECT_DECLARE_TYPE(OpenRISCCPU, OpenRISCCPUClass,
- OPENRISC_CPU)
+OBJECT_DECLARE_CPU_TYPE(OpenRISCCPU, OpenRISCCPUClass, OPENRISC_CPU)
/**
* OpenRISCCPUClass:
@@ -231,18 +227,18 @@ typedef struct CPUOpenRISCTLBContext {
OpenRISCTLBEntry itlb[TLB_SIZE];
OpenRISCTLBEntry dtlb[TLB_SIZE];
- int (*cpu_openrisc_map_address_code)(struct OpenRISCCPU *cpu,
+ int (*cpu_openrisc_map_address_code)(OpenRISCCPU *cpu,
hwaddr *physical,
int *prot,
target_ulong address, int rw);
- int (*cpu_openrisc_map_address_data)(struct OpenRISCCPU *cpu,
+ int (*cpu_openrisc_map_address_data)(OpenRISCCPU *cpu,
hwaddr *physical,
int *prot,
target_ulong address, int rw);
} CPUOpenRISCTLBContext;
#endif
-typedef struct CPUOpenRISCState {
+typedef struct CPUArchState {
target_ulong shadow_gpr[16][32]; /* Shadow registers */
target_ulong pc; /* Program counter */
@@ -301,7 +297,7 @@ typedef struct CPUOpenRISCState {
*
* A OpenRISC CPU.
*/
-struct OpenRISCCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -348,9 +344,6 @@ void cpu_openrisc_count_stop(OpenRISCCPU *cpu);
#define OPENRISC_CPU_TYPE_NAME(model) model OPENRISC_CPU_TYPE_SUFFIX
#define CPU_RESOLVING_TYPE TYPE_OPENRISC_CPU
-typedef CPUOpenRISCState CPUArchState;
-typedef OpenRISCCPU ArchCPU;
-
#include "exec/cpu-all.h"
#define TB_FLAGS_SM SR_SM
diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h
index 98facee9fa..ad7e3c3db9 100644
--- a/target/ppc/cpu-qom.h
+++ b/target/ppc/cpu-qom.h
@@ -29,10 +29,9 @@
#define TYPE_POWERPC_CPU "powerpc-cpu"
#endif
-OBJECT_DECLARE_TYPE(PowerPCCPU, PowerPCCPUClass,
- POWERPC_CPU)
+OBJECT_DECLARE_CPU_TYPE(PowerPCCPU, PowerPCCPUClass, POWERPC_CPU)
-typedef struct CPUPPCState CPUPPCState;
+typedef struct CPUArchState CPUPPCState;
typedef struct ppc_tb_t ppc_tb_t;
typedef struct ppc_dcr_t ppc_dcr_t;
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 1b687521c7..047b24ba50 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1077,7 +1077,7 @@ struct ppc_radix_page_info {
#define PPC_CPU_OPCODES_LEN 0x40
#define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
-struct CPUPPCState {
+struct CPUArchState {
/* Most commonly used resources during translated code execution first */
target_ulong gpr[32]; /* general purpose registers */
target_ulong gprh[32]; /* storage for GPR MSB, used by the SPE extension */
@@ -1275,7 +1275,7 @@ typedef struct PPCVirtualHypervisorClass PPCVirtualHypervisorClass;
*
* A PowerPC CPU.
*/
-struct PowerPCCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -1477,9 +1477,6 @@ void ppc_compat_add_property(Object *obj, const char *name,
uint32_t *compat_pvr, const char *basedesc);
#endif /* defined(TARGET_PPC64) */
-typedef CPUPPCState CPUArchState;
-typedef PowerPCCPU ArchCPU;
-
#include "exec/cpu-all.h"
/*****************************************************************************/
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 8f970288f5..bd12db960a 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -2156,9 +2156,8 @@ VSX_TSQRT(xvtsqrtsp, 4, float32, VsrW(i), -126, 23)
* maddflgs - flags for the float*muladd routine that control the
* various forms (madd, msub, nmadd, nmsub)
* sfprf - set FPRF
- * r2sp - round intermediate double precision result to single precision
*/
-#define VSX_MADD(op, nels, tp, fld, maddflgs, sfprf, r2sp) \
+#define VSX_MADD(op, nels, tp, fld, maddflgs, sfprf) \
void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \
ppc_vsr_t *s1, ppc_vsr_t *s2, ppc_vsr_t *s3) \
{ \
@@ -2170,20 +2169,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \
for (i = 0; i < nels; i++) { \
float_status tstat = env->fp_status; \
set_float_exception_flags(0, &tstat); \
- if (r2sp && (tstat.float_rounding_mode == float_round_nearest_even)) {\
- /* \
- * Avoid double rounding errors by rounding the intermediate \
- * result to odd. \
- */ \
- set_float_rounding_mode(float_round_to_zero, &tstat); \
- t.fld = tp##_muladd(s1->fld, s3->fld, s2->fld, \
- maddflgs, &tstat); \
- t.fld |= (get_float_exception_flags(&tstat) & \
- float_flag_inexact) != 0; \
- } else { \
- t.fld = tp##_muladd(s1->fld, s3->fld, s2->fld, \
- maddflgs, &tstat); \
- } \
+ t.fld = tp##_muladd(s1->fld, s3->fld, s2->fld, maddflgs, &tstat); \
env->fp_status.float_exception_flags |= tstat.float_exception_flags; \
\
if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \
@@ -2191,10 +2177,6 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \
sfprf, GETPC()); \
} \
\
- if (r2sp) { \
- t.fld = do_frsp(env, t.fld, GETPC()); \
- } \
- \
if (sfprf) { \
helper_compute_fprf_float64(env, t.fld); \
} \
@@ -2203,24 +2185,24 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \
do_float_check_status(env, GETPC()); \
}
-VSX_MADD(XSMADDDP, 1, float64, VsrD(0), MADD_FLGS, 1, 0)
-VSX_MADD(XSMSUBDP, 1, float64, VsrD(0), MSUB_FLGS, 1, 0)
-VSX_MADD(XSNMADDDP, 1, float64, VsrD(0), NMADD_FLGS, 1, 0)
-VSX_MADD(XSNMSUBDP, 1, float64, VsrD(0), NMSUB_FLGS, 1, 0)
-VSX_MADD(XSMADDSP, 1, float64, VsrD(0), MADD_FLGS, 1, 1)
-VSX_MADD(XSMSUBSP, 1, float64, VsrD(0), MSUB_FLGS, 1, 1)
-VSX_MADD(XSNMADDSP, 1, float64, VsrD(0), NMADD_FLGS, 1, 1)
-VSX_MADD(XSNMSUBSP, 1, float64, VsrD(0), NMSUB_FLGS, 1, 1)
+VSX_MADD(XSMADDDP, 1, float64, VsrD(0), MADD_FLGS, 1)
+VSX_MADD(XSMSUBDP, 1, float64, VsrD(0), MSUB_FLGS, 1)
+VSX_MADD(XSNMADDDP, 1, float64, VsrD(0), NMADD_FLGS, 1)
+VSX_MADD(XSNMSUBDP, 1, float64, VsrD(0), NMSUB_FLGS, 1)
+VSX_MADD(XSMADDSP, 1, float64r32, VsrD(0), MADD_FLGS, 1)
+VSX_MADD(XSMSUBSP, 1, float64r32, VsrD(0), MSUB_FLGS, 1)
+VSX_MADD(XSNMADDSP, 1, float64r32, VsrD(0), NMADD_FLGS, 1)
+VSX_MADD(XSNMSUBSP, 1, float64r32, VsrD(0), NMSUB_FLGS, 1)
-VSX_MADD(xvmadddp, 2, float64, VsrD(i), MADD_FLGS, 0, 0)
-VSX_MADD(xvmsubdp, 2, float64, VsrD(i), MSUB_FLGS, 0, 0)
-VSX_MADD(xvnmadddp, 2, float64, VsrD(i), NMADD_FLGS, 0, 0)
-VSX_MADD(xvnmsubdp, 2, float64, VsrD(i), NMSUB_FLGS, 0, 0)
+VSX_MADD(xvmadddp, 2, float64, VsrD(i), MADD_FLGS, 0)
+VSX_MADD(xvmsubdp, 2, float64, VsrD(i), MSUB_FLGS, 0)
+VSX_MADD(xvnmadddp, 2, float64, VsrD(i), NMADD_FLGS, 0)
+VSX_MADD(xvnmsubdp, 2, float64, VsrD(i), NMSUB_FLGS, 0)
-VSX_MADD(xvmaddsp, 4, float32, VsrW(i), MADD_FLGS, 0, 0)
-VSX_MADD(xvmsubsp, 4, float32, VsrW(i), MSUB_FLGS, 0, 0)
-VSX_MADD(xvnmaddsp, 4, float32, VsrW(i), NMADD_FLGS, 0, 0)
-VSX_MADD(xvnmsubsp, 4, float32, VsrW(i), NMSUB_FLGS, 0, 0)
+VSX_MADD(xvmaddsp, 4, float32, VsrW(i), MADD_FLGS, 0)
+VSX_MADD(xvmsubsp, 4, float32, VsrW(i), MSUB_FLGS, 0)
+VSX_MADD(xvnmaddsp, 4, float32, VsrW(i), NMADD_FLGS, 0)
+VSX_MADD(xvnmsubsp, 4, float32, VsrW(i), NMSUB_FLGS, 0)
/*
* VSX_MADDQ - VSX floating point quad-precision muliply/add
@@ -2540,6 +2522,8 @@ void helper_##name(CPUPPCState *env, \
ppc_vsr_t t = { }; \
bool first; \
\
+ helper_reset_fpstatus(env); \
+ \
if (max) { \
first = tp##_le_quiet(xb->fld, xa->fld, &env->fp_status); \
} else { \
@@ -2790,6 +2774,8 @@ void helper_XVCVSPBF16(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb)
ppc_vsr_t t = { };
int i, status;
+ helper_reset_fpstatus(env);
+
for (i = 0; i < 4; i++) {
t.VsrH(2 * i + 1) = float32_to_bfloat16(xb->VsrW(i), &env->fp_status);
}
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index b2b17bb1ca..492f34c499 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1072,7 +1072,7 @@ void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
*r = result;
}
-#define XXGENPCV(NAME, SZ) \
+#define XXGENPCV_BE_EXP(NAME, SZ) \
void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
{ \
ppc_vsr_t tmp; \
@@ -1093,8 +1093,9 @@ void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
} \
\
*t = tmp; \
-} \
- \
+}
+
+#define XXGENPCV_BE_COMP(NAME, SZ) \
void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
{ \
ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
@@ -1111,8 +1112,9 @@ void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
} \
\
*t = tmp; \
-} \
- \
+}
+
+#define XXGENPCV_LE_EXP(NAME, SZ) \
void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
{ \
ppc_vsr_t tmp; \
@@ -1135,8 +1137,9 @@ void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
} \
\
*t = tmp; \
-} \
- \
+}
+
+#define XXGENPCV_LE_COMP(NAME, SZ) \
void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
{ \
ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \
@@ -1157,10 +1160,21 @@ void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
*t = tmp; \
}
+#define XXGENPCV(NAME, SZ) \
+ XXGENPCV_BE_EXP(NAME, SZ) \
+ XXGENPCV_BE_COMP(NAME, SZ) \
+ XXGENPCV_LE_EXP(NAME, SZ) \
+ XXGENPCV_LE_COMP(NAME, SZ) \
+
XXGENPCV(XXGENPCVBM, 1)
XXGENPCV(XXGENPCVHM, 2)
XXGENPCV(XXGENPCVWM, 4)
XXGENPCV(XXGENPCVDM, 8)
+
+#undef XXGENPCV_BE_EXP
+#undef XXGENPCV_BE_COMP
+#undef XXGENPCV_LE_EXP
+#undef XXGENPCV_LE_COMP
#undef XXGENPCV
#if defined(HOST_WORDS_BIGENDIAN)
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index f91bee839d..6101bca3fd 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -1088,10 +1088,8 @@ static void do_vrlq_mask(TCGv_i64 mh, TCGv_i64 ml, TCGv_i64 b, TCGv_i64 e)
tcg_gen_or_i64(tl, t1, tl);
/* t = t >> 1 */
- tcg_gen_shli_i64(t0, th, 63);
- tcg_gen_shri_i64(tl, tl, 1);
+ tcg_gen_extract2_i64(tl, tl, th, 1);
tcg_gen_shri_i64(th, th, 1);
- tcg_gen_or_i64(tl, t0, tl);
/* m = m ^ t */
tcg_gen_xor_i64(mh, mh, th);
@@ -1148,10 +1146,8 @@ static bool do_vector_rotl_quad(DisasContext *ctx, arg_VX *a, bool mask,
tcg_gen_or_i64(t1, ah, t1);
if (mask || insert) {
- tcg_gen_shri_i64(n, vrb, 8);
- tcg_gen_shri_i64(vrb, vrb, 16);
- tcg_gen_andi_i64(n, n, 0x7f);
- tcg_gen_andi_i64(vrb, vrb, 0x7f);
+ tcg_gen_extract_i64(n, vrb, 8, 7);
+ tcg_gen_extract_i64(vrb, vrb, 16, 7);
do_vrlq_mask(ah, al, vrb, n);
@@ -1161,10 +1157,8 @@ static bool do_vector_rotl_quad(DisasContext *ctx, arg_VX *a, bool mask,
if (insert) {
get_avr64(n, a->vrt, true);
get_avr64(vrb, a->vrt, false);
- tcg_gen_not_i64(ah, ah);
- tcg_gen_not_i64(al, al);
- tcg_gen_and_i64(n, n, ah);
- tcg_gen_and_i64(vrb, vrb, al);
+ tcg_gen_andc_i64(n, n, ah);
+ tcg_gen_andc_i64(vrb, vrb, al);
tcg_gen_or_i64(t0, t0, n);
tcg_gen_or_i64(t1, t1, vrb);
}
@@ -3141,14 +3135,14 @@ static bool trans_VMULLD(DisasContext *ctx, arg_VX *a)
return true;
}
-TRANS_FLAGS2(ALTIVEC_207, VMULESB, do_vx_helper, gen_helper_VMULESB)
-TRANS_FLAGS2(ALTIVEC_207, VMULOSB, do_vx_helper, gen_helper_VMULOSB)
-TRANS_FLAGS2(ALTIVEC_207, VMULEUB, do_vx_helper, gen_helper_VMULEUB)
-TRANS_FLAGS2(ALTIVEC_207, VMULOUB, do_vx_helper, gen_helper_VMULOUB)
-TRANS_FLAGS2(ALTIVEC_207, VMULESH, do_vx_helper, gen_helper_VMULESH)
-TRANS_FLAGS2(ALTIVEC_207, VMULOSH, do_vx_helper, gen_helper_VMULOSH)
-TRANS_FLAGS2(ALTIVEC_207, VMULEUH, do_vx_helper, gen_helper_VMULEUH)
-TRANS_FLAGS2(ALTIVEC_207, VMULOUH, do_vx_helper, gen_helper_VMULOUH)
+TRANS_FLAGS(ALTIVEC, VMULESB, do_vx_helper, gen_helper_VMULESB)
+TRANS_FLAGS(ALTIVEC, VMULOSB, do_vx_helper, gen_helper_VMULOSB)
+TRANS_FLAGS(ALTIVEC, VMULEUB, do_vx_helper, gen_helper_VMULEUB)
+TRANS_FLAGS(ALTIVEC, VMULOUB, do_vx_helper, gen_helper_VMULOUB)
+TRANS_FLAGS(ALTIVEC, VMULESH, do_vx_helper, gen_helper_VMULESH)
+TRANS_FLAGS(ALTIVEC, VMULOSH, do_vx_helper, gen_helper_VMULOSH)
+TRANS_FLAGS(ALTIVEC, VMULEUH, do_vx_helper, gen_helper_VMULEUH)
+TRANS_FLAGS(ALTIVEC, VMULOUH, do_vx_helper, gen_helper_VMULOUH)
TRANS_FLAGS2(ALTIVEC_207, VMULESW, do_vx_helper, gen_helper_VMULESW)
TRANS_FLAGS2(ALTIVEC_207, VMULOSW, do_vx_helper, gen_helper_VMULOSW)
TRANS_FLAGS2(ALTIVEC_207, VMULEUW, do_vx_helper, gen_helper_VMULEUW)
@@ -3162,19 +3156,16 @@ static void do_vx_vmulhw_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b, bool sign)
{
TCGv_i64 hh, lh, temp;
- uint64_t c;
hh = tcg_temp_new_i64();
lh = tcg_temp_new_i64();
temp = tcg_temp_new_i64();
- c = 0xFFFFFFFF;
-
if (sign) {
tcg_gen_ext32s_i64(lh, a);
tcg_gen_ext32s_i64(temp, b);
} else {
- tcg_gen_andi_i64(lh, a, c);
- tcg_gen_andi_i64(temp, b, c);
+ tcg_gen_ext32u_i64(lh, a);
+ tcg_gen_ext32u_i64(temp, b);
}
tcg_gen_mul_i64(lh, lh, temp);
@@ -3188,8 +3179,7 @@ static void do_vx_vmulhw_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b, bool sign)
tcg_gen_mul_i64(hh, hh, temp);
tcg_gen_shri_i64(lh, lh, 32);
- tcg_gen_andi_i64(hh, hh, c << 32);
- tcg_gen_or_i64(t, hh, lh);
+ tcg_gen_deposit_i64(t, hh, lh, 0, 32);
tcg_temp_free_i64(hh);
tcg_temp_free_i64(lh);
diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc
index 2ffeab5287..48a97b2d7e 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -1204,43 +1204,44 @@ static bool trans_XXPERMX(DisasContext *ctx, arg_8RR_XX4_uim3 *a)
return true;
}
-#define XXGENPCV(NAME) \
-static bool trans_##NAME(DisasContext *ctx, arg_X_imm5 *a) \
-{ \
- TCGv_ptr xt, vrb; \
- \
- REQUIRE_INSNS_FLAGS2(ctx, ISA310); \
- REQUIRE_VSX(ctx); \
- \
- if (a->imm & ~0x3) { \
- gen_invalid(ctx); \
- return true; \
- } \
- \
- xt = gen_vsr_ptr(a->xt); \
- vrb = gen_avr_ptr(a->vrb); \
- \
- switch (a->imm) { \
- case 0b00000: /* Big-Endian expansion */ \
- glue(gen_helper_, glue(NAME, _be_exp))(xt, vrb); \
- break; \
- case 0b00001: /* Big-Endian compression */ \
- glue(gen_helper_, glue(NAME, _be_comp))(xt, vrb); \
- break; \
- case 0b00010: /* Little-Endian expansion */ \
- glue(gen_helper_, glue(NAME, _le_exp))(xt, vrb); \
- break; \
- case 0b00011: /* Little-Endian compression */ \
- glue(gen_helper_, glue(NAME, _le_comp))(xt, vrb); \
- break; \
- } \
- \
- tcg_temp_free_ptr(xt); \
- tcg_temp_free_ptr(vrb); \
- \
- return true; \
+typedef void (*xxgenpcv_genfn)(TCGv_ptr, TCGv_ptr);
+
+static bool do_xxgenpcv(DisasContext *ctx, arg_X_imm5 *a,
+ const xxgenpcv_genfn fn[4])
+{
+ TCGv_ptr xt, vrb;
+
+ REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+ REQUIRE_VSX(ctx);
+
+ if (a->imm & ~0x3) {
+ gen_invalid(ctx);
+ return true;
+ }
+
+ xt = gen_vsr_ptr(a->xt);
+ vrb = gen_avr_ptr(a->vrb);
+
+ fn[a->imm](xt, vrb);
+
+ tcg_temp_free_ptr(xt);
+ tcg_temp_free_ptr(vrb);
+
+ return true;
}
+#define XXGENPCV(NAME) \
+ static bool trans_##NAME(DisasContext *ctx, arg_X_imm5 *a) \
+ { \
+ static const xxgenpcv_genfn fn[4] = { \
+ gen_helper_##NAME##_be_exp, \
+ gen_helper_##NAME##_be_comp, \
+ gen_helper_##NAME##_le_exp, \
+ gen_helper_##NAME##_le_comp, \
+ }; \
+ return do_xxgenpcv(ctx, a, fn); \
+ }
+
XXGENPCV(XXGENPCVBM)
XXGENPCV(XXGENPCVHM)
XXGENPCV(XXGENPCVWM)
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index b0a40b83e7..ddda4906ff 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -587,6 +587,11 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
cpu->cfg.ext_d = true;
}
+ if (cpu->cfg.ext_zdinx || cpu->cfg.ext_zhinx ||
+ cpu->cfg.ext_zhinxmin) {
+ cpu->cfg.ext_zfinx = true;
+ }
+
/* Set the ISA extensions, checks should have happened above */
if (cpu->cfg.ext_i) {
ext |= RVI;
@@ -665,6 +670,13 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
if (cpu->cfg.ext_j) {
ext |= RVJ;
}
+ if (cpu->cfg.ext_zfinx && ((ext & (RVF | RVD)) || cpu->cfg.ext_zfh ||
+ cpu->cfg.ext_zfhmin)) {
+ error_setg(errp,
+ "'Zfinx' cannot be supported together with 'F', 'D', 'Zfh',"
+ " 'Zfhmin'");
+ return;
+ }
set_misa(env, env->misa_mxl, ext);
}
@@ -783,6 +795,11 @@ static Property riscv_cpu_properties[] = {
DEFINE_PROP_BOOL("zbc", RISCVCPU, cfg.ext_zbc, true),
DEFINE_PROP_BOOL("zbs", RISCVCPU, cfg.ext_zbs, true),
+ DEFINE_PROP_BOOL("zdinx", RISCVCPU, cfg.ext_zdinx, false),
+ DEFINE_PROP_BOOL("zfinx", RISCVCPU, cfg.ext_zfinx, false),
+ DEFINE_PROP_BOOL("zhinx", RISCVCPU, cfg.ext_zhinx, false),
+ DEFINE_PROP_BOOL("zhinxmin", RISCVCPU, cfg.ext_zhinxmin, false),
+
/* Vendor-specific custom extensions */
DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 8183fb86d5..c069fe85fa 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -98,7 +98,7 @@ enum {
#define MAX_RISCV_PMPS (16)
-typedef struct CPURISCVState CPURISCVState;
+typedef struct CPUArchState CPURISCVState;
#if !defined(CONFIG_USER_ONLY)
#include "pmp.h"
@@ -113,7 +113,7 @@ FIELD(VTYPE, VMA, 7, 1)
FIELD(VTYPE, VEDIV, 8, 2)
FIELD(VTYPE, RESERVED, 10, sizeof(target_ulong) * 8 - 11)
-struct CPURISCVState {
+struct CPUArchState {
target_ulong gpr[32];
target_ulong gprh[32]; /* 64 top bits of the 128-bit registers */
uint64_t fpr[32]; /* assume both F and D extensions */
@@ -320,8 +320,7 @@ struct CPURISCVState {
uint64_t kvm_timer_frequency;
};
-OBJECT_DECLARE_TYPE(RISCVCPU, RISCVCPUClass,
- RISCV_CPU)
+OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
/**
* RISCVCPUClass:
@@ -362,8 +361,12 @@ struct RISCVCPUConfig {
bool ext_svinval;
bool ext_svnapot;
bool ext_svpbmt;
+ bool ext_zdinx;
bool ext_zfh;
bool ext_zfhmin;
+ bool ext_zfinx;
+ bool ext_zhinx;
+ bool ext_zhinxmin;
bool ext_zve32f;
bool ext_zve64f;
@@ -391,7 +394,7 @@ typedef struct RISCVCPUConfig RISCVCPUConfig;
*
* A RISCV CPU.
*/
-struct RISCVCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -495,8 +498,6 @@ void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong);
#define TB_FLAGS_MSTATUS_FS MSTATUS_FS
#define TB_FLAGS_MSTATUS_VS MSTATUS_VS
-typedef CPURISCVState CPUArchState;
-typedef RISCVCPU ArchCPU;
#include "exec/cpu-all.h"
FIELD(TB_FLAGS, MEM_IDX, 0, 3)
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 746335bfd6..1c60fb2e80 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -466,9 +466,13 @@ bool riscv_cpu_vector_enabled(CPURISCVState *env)
void riscv_cpu_swap_hypervisor_regs(CPURISCVState *env)
{
- uint64_t mstatus_mask = MSTATUS_MXR | MSTATUS_SUM | MSTATUS_FS |
+ uint64_t mstatus_mask = MSTATUS_MXR | MSTATUS_SUM |
MSTATUS_SPP | MSTATUS_SPIE | MSTATUS_SIE |
MSTATUS64_UXL | MSTATUS_VS;
+
+ if (riscv_has_ext(env, RVF)) {
+ mstatus_mask |= MSTATUS_FS;
+ }
bool current_virt = riscv_cpu_virt_enabled(env);
g_assert(riscv_has_ext(env, RVH));
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index a938760a3f..0606cd0ea8 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -23,6 +23,7 @@
#include "cpu.h"
#include "qemu/main-loop.h"
#include "exec/exec-all.h"
+#include "sysemu/cpu-timers.h"
/* CSR function table public API */
void riscv_get_csr_ops(int csrno, riscv_csr_operations *ops)
@@ -39,7 +40,8 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops)
static RISCVException fs(CPURISCVState *env, int csrno)
{
#if !defined(CONFIG_USER_ONLY)
- if (!env->debugger && !riscv_cpu_fp_enabled(env)) {
+ if (!env->debugger && !riscv_cpu_fp_enabled(env) &&
+ !RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) {
return RISCV_EXCP_ILLEGAL_INST;
}
#endif
@@ -302,7 +304,9 @@ static RISCVException write_fflags(CPURISCVState *env, int csrno,
target_ulong val)
{
#if !defined(CONFIG_USER_ONLY)
- env->mstatus |= MSTATUS_FS;
+ if (riscv_has_ext(env, RVF)) {
+ env->mstatus |= MSTATUS_FS;
+ }
#endif
riscv_cpu_set_fflags(env, val & (FSR_AEXC >> FSR_AEXC_SHIFT));
return RISCV_EXCP_NONE;
@@ -319,7 +323,9 @@ static RISCVException write_frm(CPURISCVState *env, int csrno,
target_ulong val)
{
#if !defined(CONFIG_USER_ONLY)
- env->mstatus |= MSTATUS_FS;
+ if (riscv_has_ext(env, RVF)) {
+ env->mstatus |= MSTATUS_FS;
+ }
#endif
env->frm = val & (FSR_RD >> FSR_RD_SHIFT);
return RISCV_EXCP_NONE;
@@ -337,7 +343,9 @@ static RISCVException write_fcsr(CPURISCVState *env, int csrno,
target_ulong val)
{
#if !defined(CONFIG_USER_ONLY)
- env->mstatus |= MSTATUS_FS;
+ if (riscv_has_ext(env, RVF)) {
+ env->mstatus |= MSTATUS_FS;
+ }
#endif
env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
@@ -653,10 +661,14 @@ static RISCVException write_mstatus(CPURISCVState *env, int csrno,
tlb_flush(env_cpu(env));
}
mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
- MSTATUS_SPP | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM |
+ MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM |
MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
MSTATUS_TW | MSTATUS_VS;
+ if (riscv_has_ext(env, RVF)) {
+ mask |= MSTATUS_FS;
+ }
+
if (xl != MXL_RV32 || env->debugger) {
/*
* RV32: MPV and GVA are not in mstatus. The current plan is to
@@ -788,6 +800,10 @@ static RISCVException write_misa(CPURISCVState *env, int csrno,
return RISCV_EXCP_NONE;
}
+ if (!(val & RVF)) {
+ env->mstatus &= ~MSTATUS_FS;
+ }
+
/* flush translation cache */
tb_flush(env_cpu(env));
env->misa_ext = val;
diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c
index 4a5982d594..5699c9517f 100644
--- a/target/riscv/fpu_helper.c
+++ b/target/riscv/fpu_helper.c
@@ -89,19 +89,21 @@ void helper_set_rod_rounding_mode(CPURISCVState *env)
static uint64_t do_fmadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2,
uint64_t rs3, int flags)
{
- float16 frs1 = check_nanbox_h(rs1);
- float16 frs2 = check_nanbox_h(rs2);
- float16 frs3 = check_nanbox_h(rs3);
- return nanbox_h(float16_muladd(frs1, frs2, frs3, flags, &env->fp_status));
+ float16 frs1 = check_nanbox_h(env, rs1);
+ float16 frs2 = check_nanbox_h(env, rs2);
+ float16 frs3 = check_nanbox_h(env, rs3);
+ return nanbox_h(env, float16_muladd(frs1, frs2, frs3, flags,
+ &env->fp_status));
}
static uint64_t do_fmadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2,
uint64_t rs3, int flags)
{
- float32 frs1 = check_nanbox_s(rs1);
- float32 frs2 = check_nanbox_s(rs2);
- float32 frs3 = check_nanbox_s(rs3);
- return nanbox_s(float32_muladd(frs1, frs2, frs3, flags, &env->fp_status));
+ float32 frs1 = check_nanbox_s(env, rs1);
+ float32 frs2 = check_nanbox_s(env, rs2);
+ float32 frs3 = check_nanbox_s(env, rs3);
+ return nanbox_s(env, float32_muladd(frs1, frs2, frs3, flags,
+ &env->fp_status));
}
uint64_t helper_fmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
@@ -183,124 +185,124 @@ uint64_t helper_fnmadd_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2,
uint64_t helper_fadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float32 frs1 = check_nanbox_s(rs1);
- float32 frs2 = check_nanbox_s(rs2);
- return nanbox_s(float32_add(frs1, frs2, &env->fp_status));
+ float32 frs1 = check_nanbox_s(env, rs1);
+ float32 frs2 = check_nanbox_s(env, rs2);
+ return nanbox_s(env, float32_add(frs1, frs2, &env->fp_status));
}
uint64_t helper_fsub_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float32 frs1 = check_nanbox_s(rs1);
- float32 frs2 = check_nanbox_s(rs2);
- return nanbox_s(float32_sub(frs1, frs2, &env->fp_status));
+ float32 frs1 = check_nanbox_s(env, rs1);
+ float32 frs2 = check_nanbox_s(env, rs2);
+ return nanbox_s(env, float32_sub(frs1, frs2, &env->fp_status));
}
uint64_t helper_fmul_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float32 frs1 = check_nanbox_s(rs1);
- float32 frs2 = check_nanbox_s(rs2);
- return nanbox_s(float32_mul(frs1, frs2, &env->fp_status));
+ float32 frs1 = check_nanbox_s(env, rs1);
+ float32 frs2 = check_nanbox_s(env, rs2);
+ return nanbox_s(env, float32_mul(frs1, frs2, &env->fp_status));
}
uint64_t helper_fdiv_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float32 frs1 = check_nanbox_s(rs1);
- float32 frs2 = check_nanbox_s(rs2);
- return nanbox_s(float32_div(frs1, frs2, &env->fp_status));
+ float32 frs1 = check_nanbox_s(env, rs1);
+ float32 frs2 = check_nanbox_s(env, rs2);
+ return nanbox_s(env, float32_div(frs1, frs2, &env->fp_status));
}
uint64_t helper_fmin_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float32 frs1 = check_nanbox_s(rs1);
- float32 frs2 = check_nanbox_s(rs2);
- return nanbox_s(env->priv_ver < PRIV_VERSION_1_11_0 ?
+ float32 frs1 = check_nanbox_s(env, rs1);
+ float32 frs2 = check_nanbox_s(env, rs2);
+ return nanbox_s(env, env->priv_ver < PRIV_VERSION_1_11_0 ?
float32_minnum(frs1, frs2, &env->fp_status) :
float32_minimum_number(frs1, frs2, &env->fp_status));
}
uint64_t helper_fmax_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float32 frs1 = check_nanbox_s(rs1);
- float32 frs2 = check_nanbox_s(rs2);
- return nanbox_s(env->priv_ver < PRIV_VERSION_1_11_0 ?
+ float32 frs1 = check_nanbox_s(env, rs1);
+ float32 frs2 = check_nanbox_s(env, rs2);
+ return nanbox_s(env, env->priv_ver < PRIV_VERSION_1_11_0 ?
float32_maxnum(frs1, frs2, &env->fp_status) :
float32_maximum_number(frs1, frs2, &env->fp_status));
}
uint64_t helper_fsqrt_s(CPURISCVState *env, uint64_t rs1)
{
- float32 frs1 = check_nanbox_s(rs1);
- return nanbox_s(float32_sqrt(frs1, &env->fp_status));
+ float32 frs1 = check_nanbox_s(env, rs1);
+ return nanbox_s(env, float32_sqrt(frs1, &env->fp_status));
}
target_ulong helper_fle_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float32 frs1 = check_nanbox_s(rs1);
- float32 frs2 = check_nanbox_s(rs2);
+ float32 frs1 = check_nanbox_s(env, rs1);
+ float32 frs2 = check_nanbox_s(env, rs2);
return float32_le(frs1, frs2, &env->fp_status);
}
target_ulong helper_flt_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float32 frs1 = check_nanbox_s(rs1);
- float32 frs2 = check_nanbox_s(rs2);
+ float32 frs1 = check_nanbox_s(env, rs1);
+ float32 frs2 = check_nanbox_s(env, rs2);
return float32_lt(frs1, frs2, &env->fp_status);
}
target_ulong helper_feq_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float32 frs1 = check_nanbox_s(rs1);
- float32 frs2 = check_nanbox_s(rs2);
+ float32 frs1 = check_nanbox_s(env, rs1);
+ float32 frs2 = check_nanbox_s(env, rs2);
return float32_eq_quiet(frs1, frs2, &env->fp_status);
}
target_ulong helper_fcvt_w_s(CPURISCVState *env, uint64_t rs1)
{
- float32 frs1 = check_nanbox_s(rs1);
+ float32 frs1 = check_nanbox_s(env, rs1);
return float32_to_int32(frs1, &env->fp_status);
}
target_ulong helper_fcvt_wu_s(CPURISCVState *env, uint64_t rs1)
{
- float32 frs1 = check_nanbox_s(rs1);
+ float32 frs1 = check_nanbox_s(env, rs1);
return (int32_t)float32_to_uint32(frs1, &env->fp_status);
}
target_ulong helper_fcvt_l_s(CPURISCVState *env, uint64_t rs1)
{
- float32 frs1 = check_nanbox_s(rs1);
+ float32 frs1 = check_nanbox_s(env, rs1);
return float32_to_int64(frs1, &env->fp_status);
}
target_ulong helper_fcvt_lu_s(CPURISCVState *env, uint64_t rs1)
{
- float32 frs1 = check_nanbox_s(rs1);
+ float32 frs1 = check_nanbox_s(env, rs1);
return float32_to_uint64(frs1, &env->fp_status);
}
uint64_t helper_fcvt_s_w(CPURISCVState *env, target_ulong rs1)
{
- return nanbox_s(int32_to_float32((int32_t)rs1, &env->fp_status));
+ return nanbox_s(env, int32_to_float32((int32_t)rs1, &env->fp_status));
}
uint64_t helper_fcvt_s_wu(CPURISCVState *env, target_ulong rs1)
{
- return nanbox_s(uint32_to_float32((uint32_t)rs1, &env->fp_status));
+ return nanbox_s(env, uint32_to_float32((uint32_t)rs1, &env->fp_status));
}
uint64_t helper_fcvt_s_l(CPURISCVState *env, target_ulong rs1)
{
- return nanbox_s(int64_to_float32(rs1, &env->fp_status));
+ return nanbox_s(env, int64_to_float32(rs1, &env->fp_status));
}
uint64_t helper_fcvt_s_lu(CPURISCVState *env, target_ulong rs1)
{
- return nanbox_s(uint64_to_float32(rs1, &env->fp_status));
+ return nanbox_s(env, uint64_to_float32(rs1, &env->fp_status));
}
-target_ulong helper_fclass_s(uint64_t rs1)
+target_ulong helper_fclass_s(CPURISCVState *env, uint64_t rs1)
{
- float32 frs1 = check_nanbox_s(rs1);
+ float32 frs1 = check_nanbox_s(env, rs1);
return fclass_s(frs1);
}
@@ -340,12 +342,12 @@ uint64_t helper_fmax_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
uint64_t helper_fcvt_s_d(CPURISCVState *env, uint64_t rs1)
{
- return nanbox_s(float64_to_float32(rs1, &env->fp_status));
+ return nanbox_s(env, float64_to_float32(rs1, &env->fp_status));
}
uint64_t helper_fcvt_d_s(CPURISCVState *env, uint64_t rs1)
{
- float32 frs1 = check_nanbox_s(rs1);
+ float32 frs1 = check_nanbox_s(env, rs1);
return float32_to_float64(frs1, &env->fp_status);
}
@@ -416,146 +418,146 @@ target_ulong helper_fclass_d(uint64_t frs1)
uint64_t helper_fadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float16 frs1 = check_nanbox_h(rs1);
- float16 frs2 = check_nanbox_h(rs2);
- return nanbox_h(float16_add(frs1, frs2, &env->fp_status));
+ float16 frs1 = check_nanbox_h(env, rs1);
+ float16 frs2 = check_nanbox_h(env, rs2);
+ return nanbox_h(env, float16_add(frs1, frs2, &env->fp_status));
}
uint64_t helper_fsub_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float16 frs1 = check_nanbox_h(rs1);
- float16 frs2 = check_nanbox_h(rs2);
- return nanbox_h(float16_sub(frs1, frs2, &env->fp_status));
+ float16 frs1 = check_nanbox_h(env, rs1);
+ float16 frs2 = check_nanbox_h(env, rs2);
+ return nanbox_h(env, float16_sub(frs1, frs2, &env->fp_status));
}
uint64_t helper_fmul_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float16 frs1 = check_nanbox_h(rs1);
- float16 frs2 = check_nanbox_h(rs2);
- return nanbox_h(float16_mul(frs1, frs2, &env->fp_status));
+ float16 frs1 = check_nanbox_h(env, rs1);
+ float16 frs2 = check_nanbox_h(env, rs2);
+ return nanbox_h(env, float16_mul(frs1, frs2, &env->fp_status));
}
uint64_t helper_fdiv_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float16 frs1 = check_nanbox_h(rs1);
- float16 frs2 = check_nanbox_h(rs2);
- return nanbox_h(float16_div(frs1, frs2, &env->fp_status));
+ float16 frs1 = check_nanbox_h(env, rs1);
+ float16 frs2 = check_nanbox_h(env, rs2);
+ return nanbox_h(env, float16_div(frs1, frs2, &env->fp_status));
}
uint64_t helper_fmin_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float16 frs1 = check_nanbox_h(rs1);
- float16 frs2 = check_nanbox_h(rs2);
- return nanbox_h(env->priv_ver < PRIV_VERSION_1_11_0 ?
+ float16 frs1 = check_nanbox_h(env, rs1);
+ float16 frs2 = check_nanbox_h(env, rs2);
+ return nanbox_h(env, env->priv_ver < PRIV_VERSION_1_11_0 ?
float16_minnum(frs1, frs2, &env->fp_status) :
float16_minimum_number(frs1, frs2, &env->fp_status));
}
uint64_t helper_fmax_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float16 frs1 = check_nanbox_h(rs1);
- float16 frs2 = check_nanbox_h(rs2);
- return nanbox_h(env->priv_ver < PRIV_VERSION_1_11_0 ?
+ float16 frs1 = check_nanbox_h(env, rs1);
+ float16 frs2 = check_nanbox_h(env, rs2);
+ return nanbox_h(env, env->priv_ver < PRIV_VERSION_1_11_0 ?
float16_maxnum(frs1, frs2, &env->fp_status) :
float16_maximum_number(frs1, frs2, &env->fp_status));
}
uint64_t helper_fsqrt_h(CPURISCVState *env, uint64_t rs1)
{
- float16 frs1 = check_nanbox_h(rs1);
- return nanbox_h(float16_sqrt(frs1, &env->fp_status));
+ float16 frs1 = check_nanbox_h(env, rs1);
+ return nanbox_h(env, float16_sqrt(frs1, &env->fp_status));
}
target_ulong helper_fle_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float16 frs1 = check_nanbox_h(rs1);
- float16 frs2 = check_nanbox_h(rs2);
+ float16 frs1 = check_nanbox_h(env, rs1);
+ float16 frs2 = check_nanbox_h(env, rs2);
return float16_le(frs1, frs2, &env->fp_status);
}
target_ulong helper_flt_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float16 frs1 = check_nanbox_h(rs1);
- float16 frs2 = check_nanbox_h(rs2);
+ float16 frs1 = check_nanbox_h(env, rs1);
+ float16 frs2 = check_nanbox_h(env, rs2);
return float16_lt(frs1, frs2, &env->fp_status);
}
target_ulong helper_feq_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
{
- float16 frs1 = check_nanbox_h(rs1);
- float16 frs2 = check_nanbox_h(rs2);
+ float16 frs1 = check_nanbox_h(env, rs1);
+ float16 frs2 = check_nanbox_h(env, rs2);
return float16_eq_quiet(frs1, frs2, &env->fp_status);
}
-target_ulong helper_fclass_h(uint64_t rs1)
+target_ulong helper_fclass_h(CPURISCVState *env, uint64_t rs1)
{
- float16 frs1 = check_nanbox_h(rs1);
+ float16 frs1 = check_nanbox_h(env, rs1);
return fclass_h(frs1);
}
target_ulong helper_fcvt_w_h(CPURISCVState *env, uint64_t rs1)
{
- float16 frs1 = check_nanbox_h(rs1);
+ float16 frs1 = check_nanbox_h(env, rs1);
return float16_to_int32(frs1, &env->fp_status);
}
target_ulong helper_fcvt_wu_h(CPURISCVState *env, uint64_t rs1)
{
- float16 frs1 = check_nanbox_h(rs1);
+ float16 frs1 = check_nanbox_h(env, rs1);
return (int32_t)float16_to_uint32(frs1, &env->fp_status);
}
target_ulong helper_fcvt_l_h(CPURISCVState *env, uint64_t rs1)
{
- float16 frs1 = check_nanbox_h(rs1);
+ float16 frs1 = check_nanbox_h(env, rs1);
return float16_to_int64(frs1, &env->fp_status);
}
target_ulong helper_fcvt_lu_h(CPURISCVState *env, uint64_t rs1)
{
- float16 frs1 = check_nanbox_h(rs1);
+ float16 frs1 = check_nanbox_h(env, rs1);
return float16_to_uint64(frs1, &env->fp_status);
}
uint64_t helper_fcvt_h_w(CPURISCVState *env, target_ulong rs1)
{
- return nanbox_h(int32_to_float16((int32_t)rs1, &env->fp_status));
+ return nanbox_h(env, int32_to_float16((int32_t)rs1, &env->fp_status));
}
uint64_t helper_fcvt_h_wu(CPURISCVState *env, target_ulong rs1)
{
- return nanbox_h(uint32_to_float16((uint32_t)rs1, &env->fp_status));
+ return nanbox_h(env, uint32_to_float16((uint32_t)rs1, &env->fp_status));
}
uint64_t helper_fcvt_h_l(CPURISCVState *env, target_ulong rs1)
{
- return nanbox_h(int64_to_float16(rs1, &env->fp_status));
+ return nanbox_h(env, int64_to_float16(rs1, &env->fp_status));
}
uint64_t helper_fcvt_h_lu(CPURISCVState *env, target_ulong rs1)
{
- return nanbox_h(uint64_to_float16(rs1, &env->fp_status));
+ return nanbox_h(env, uint64_to_float16(rs1, &env->fp_status));
}
uint64_t helper_fcvt_h_s(CPURISCVState *env, uint64_t rs1)
{
- float32 frs1 = check_nanbox_s(rs1);
- return nanbox_h(float32_to_float16(frs1, true, &env->fp_status));
+ float32 frs1 = check_nanbox_s(env, rs1);
+ return nanbox_h(env, float32_to_float16(frs1, true, &env->fp_status));
}
uint64_t helper_fcvt_s_h(CPURISCVState *env, uint64_t rs1)
{
- float16 frs1 = check_nanbox_h(rs1);
- return nanbox_s(float16_to_float32(frs1, true, &env->fp_status));
+ float16 frs1 = check_nanbox_h(env, rs1);
+ return nanbox_s(env, float16_to_float32(frs1, true, &env->fp_status));
}
uint64_t helper_fcvt_h_d(CPURISCVState *env, uint64_t rs1)
{
- return nanbox_h(float64_to_float16(rs1, true, &env->fp_status));
+ return nanbox_h(env, float64_to_float16(rs1, true, &env->fp_status));
}
uint64_t helper_fcvt_d_h(CPURISCVState *env, uint64_t rs1)
{
- float16 frs1 = check_nanbox_h(rs1);
+ float16 frs1 = check_nanbox_h(env, rs1);
return float16_to_float64(frs1, true, &env->fp_status);
}
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 72cc2582f4..26bbab2fab 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -38,7 +38,7 @@ DEF_HELPER_FLAGS_2(fcvt_s_w, TCG_CALL_NO_RWG, i64, env, tl)
DEF_HELPER_FLAGS_2(fcvt_s_wu, TCG_CALL_NO_RWG, i64, env, tl)
DEF_HELPER_FLAGS_2(fcvt_s_l, TCG_CALL_NO_RWG, i64, env, tl)
DEF_HELPER_FLAGS_2(fcvt_s_lu, TCG_CALL_NO_RWG, i64, env, tl)
-DEF_HELPER_FLAGS_1(fclass_s, TCG_CALL_NO_RWG_SE, tl, i64)
+DEF_HELPER_FLAGS_2(fclass_s, TCG_CALL_NO_RWG_SE, tl, env, i64)
/* Floating Point - Double Precision */
DEF_HELPER_FLAGS_3(fadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64)
@@ -90,7 +90,7 @@ DEF_HELPER_FLAGS_2(fcvt_h_w, TCG_CALL_NO_RWG, i64, env, tl)
DEF_HELPER_FLAGS_2(fcvt_h_wu, TCG_CALL_NO_RWG, i64, env, tl)
DEF_HELPER_FLAGS_2(fcvt_h_l, TCG_CALL_NO_RWG, i64, env, tl)
DEF_HELPER_FLAGS_2(fcvt_h_lu, TCG_CALL_NO_RWG, i64, env, tl)
-DEF_HELPER_FLAGS_1(fclass_h, TCG_CALL_NO_RWG_SE, tl, i64)
+DEF_HELPER_FLAGS_2(fclass_h, TCG_CALL_NO_RWG_SE, tl, env, i64)
/* Special functions */
DEF_HELPER_2(csrr, tl, env, int)
diff --git a/target/riscv/insn_trans/trans_rvb.c.inc b/target/riscv/insn_trans/trans_rvb.c.inc
index f9bd3b7ec4..e8519a6d69 100644
--- a/target/riscv/insn_trans/trans_rvb.c.inc
+++ b/target/riscv/insn_trans/trans_rvb.c.inc
@@ -19,25 +19,25 @@
*/
#define REQUIRE_ZBA(ctx) do { \
- if (ctx->cfg_ptr->ext_zba) { \
+ if (!ctx->cfg_ptr->ext_zba) { \
return false; \
} \
} while (0)
#define REQUIRE_ZBB(ctx) do { \
- if (ctx->cfg_ptr->ext_zbb) { \
+ if (!ctx->cfg_ptr->ext_zbb) { \
return false; \
} \
} while (0)
#define REQUIRE_ZBC(ctx) do { \
- if (ctx->cfg_ptr->ext_zbc) { \
+ if (!ctx->cfg_ptr->ext_zbc) { \
return false; \
} \
} while (0)
#define REQUIRE_ZBS(ctx) do { \
- if (ctx->cfg_ptr->ext_zbs) { \
+ if (!ctx->cfg_ptr->ext_zbs) { \
return false; \
} \
} while (0)
diff --git a/target/riscv/insn_trans/trans_rvd.c.inc b/target/riscv/insn_trans/trans_rvd.c.inc
index 091ed3a8ad..1397c1ce1c 100644
--- a/target/riscv/insn_trans/trans_rvd.c.inc
+++ b/target/riscv/insn_trans/trans_rvd.c.inc
@@ -18,6 +18,19 @@
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#define REQUIRE_ZDINX_OR_D(ctx) do { \
+ if (!ctx->cfg_ptr->ext_zdinx) { \
+ REQUIRE_EXT(ctx, RVD); \
+ } \
+} while (0)
+
+#define REQUIRE_EVEN(ctx, reg) do { \
+ if (ctx->cfg_ptr->ext_zdinx && (get_xl(ctx) == MXL_RV32) && \
+ ((reg) & 0x1)) { \
+ return false; \
+ } \
+} while (0)
+
static bool trans_fld(DisasContext *ctx, arg_fld *a)
{
TCGv addr;
@@ -47,10 +60,17 @@ static bool trans_fsd(DisasContext *ctx, arg_fsd *a)
static bool trans_fmadd_d(DisasContext *ctx, arg_fmadd_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2 | a->rs3);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+ TCGv_i64 src3 = get_fpr_d(ctx, a->rs3);
+
gen_set_rm(ctx, a->rm);
- gen_helper_fmadd_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ gen_helper_fmadd_d(dest, cpu_env, src1, src2, src3);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -58,10 +78,17 @@ static bool trans_fmadd_d(DisasContext *ctx, arg_fmadd_d *a)
static bool trans_fmsub_d(DisasContext *ctx, arg_fmsub_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2 | a->rs3);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+ TCGv_i64 src3 = get_fpr_d(ctx, a->rs3);
+
gen_set_rm(ctx, a->rm);
- gen_helper_fmsub_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ gen_helper_fmsub_d(dest, cpu_env, src1, src2, src3);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -69,10 +96,17 @@ static bool trans_fmsub_d(DisasContext *ctx, arg_fmsub_d *a)
static bool trans_fnmsub_d(DisasContext *ctx, arg_fnmsub_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2 | a->rs3);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+ TCGv_i64 src3 = get_fpr_d(ctx, a->rs3);
+
gen_set_rm(ctx, a->rm);
- gen_helper_fnmsub_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ gen_helper_fnmsub_d(dest, cpu_env, src1, src2, src3);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -80,10 +114,17 @@ static bool trans_fnmsub_d(DisasContext *ctx, arg_fnmsub_d *a)
static bool trans_fnmadd_d(DisasContext *ctx, arg_fnmadd_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2 | a->rs3);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+ TCGv_i64 src3 = get_fpr_d(ctx, a->rs3);
+
gen_set_rm(ctx, a->rm);
- gen_helper_fnmadd_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ gen_helper_fnmadd_d(dest, cpu_env, src1, src2, src3);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -91,12 +132,16 @@ static bool trans_fnmadd_d(DisasContext *ctx, arg_fnmadd_d *a)
static bool trans_fadd_d(DisasContext *ctx, arg_fadd_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2);
- gen_set_rm(ctx, a->rm);
- gen_helper_fadd_d(cpu_fpr[a->rd], cpu_env,
- cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fadd_d(dest, cpu_env, src1, src2);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -104,12 +149,16 @@ static bool trans_fadd_d(DisasContext *ctx, arg_fadd_d *a)
static bool trans_fsub_d(DisasContext *ctx, arg_fsub_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2);
- gen_set_rm(ctx, a->rm);
- gen_helper_fsub_d(cpu_fpr[a->rd], cpu_env,
- cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fsub_d(dest, cpu_env, src1, src2);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -117,12 +166,16 @@ static bool trans_fsub_d(DisasContext *ctx, arg_fsub_d *a)
static bool trans_fmul_d(DisasContext *ctx, arg_fmul_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2);
- gen_set_rm(ctx, a->rm);
- gen_helper_fmul_d(cpu_fpr[a->rd], cpu_env,
- cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fmul_d(dest, cpu_env, src1, src2);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -130,12 +183,16 @@ static bool trans_fmul_d(DisasContext *ctx, arg_fmul_d *a)
static bool trans_fdiv_d(DisasContext *ctx, arg_fdiv_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2);
- gen_set_rm(ctx, a->rm);
- gen_helper_fdiv_d(cpu_fpr[a->rd], cpu_env,
- cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fdiv_d(dest, cpu_env, src1, src2);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -143,23 +200,34 @@ static bool trans_fdiv_d(DisasContext *ctx, arg_fdiv_d *a)
static bool trans_fsqrt_d(DisasContext *ctx, arg_fsqrt_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd | a->rs1);
- gen_set_rm(ctx, a->rm);
- gen_helper_fsqrt_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fsqrt_d(dest, cpu_env, src1);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
static bool trans_fsgnj_d(DisasContext *ctx, arg_fsgnj_d *a)
{
+ REQUIRE_FPU;
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
if (a->rs1 == a->rs2) { /* FMOV */
- tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
+ dest = get_fpr_d(ctx, a->rs1);
} else {
- tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2],
- cpu_fpr[a->rs1], 0, 63);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+ tcg_gen_deposit_i64(dest, src2, src1, 0, 63);
}
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -167,15 +235,22 @@ static bool trans_fsgnj_d(DisasContext *ctx, arg_fsgnj_d *a)
static bool trans_fsgnjn_d(DisasContext *ctx, arg_fsgnjn_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+
if (a->rs1 == a->rs2) { /* FNEG */
- tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT64_MIN);
+ tcg_gen_xori_i64(dest, src1, INT64_MIN);
} else {
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
TCGv_i64 t0 = tcg_temp_new_i64();
- tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);
- tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 63);
+ tcg_gen_not_i64(t0, src2);
+ tcg_gen_deposit_i64(dest, t0, src1, 0, 63);
tcg_temp_free_i64(t0);
}
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -183,15 +258,22 @@ static bool trans_fsgnjn_d(DisasContext *ctx, arg_fsgnjn_d *a)
static bool trans_fsgnjx_d(DisasContext *ctx, arg_fsgnjx_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+
if (a->rs1 == a->rs2) { /* FABS */
- tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT64_MIN);
+ tcg_gen_andi_i64(dest, src1, ~INT64_MIN);
} else {
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
TCGv_i64 t0 = tcg_temp_new_i64();
- tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT64_MIN);
- tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);
+ tcg_gen_andi_i64(t0, src2, INT64_MIN);
+ tcg_gen_xor_i64(dest, src1, t0);
tcg_temp_free_i64(t0);
}
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -199,11 +281,15 @@ static bool trans_fsgnjx_d(DisasContext *ctx, arg_fsgnjx_d *a)
static bool trans_fmin_d(DisasContext *ctx, arg_fmin_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2);
- gen_helper_fmin_d(cpu_fpr[a->rd], cpu_env,
- cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+ gen_helper_fmin_d(dest, cpu_env, src1, src2);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -211,11 +297,15 @@ static bool trans_fmin_d(DisasContext *ctx, arg_fmin_d *a)
static bool trans_fmax_d(DisasContext *ctx, arg_fmax_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2);
- gen_helper_fmax_d(cpu_fpr[a->rd], cpu_env,
- cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
+ gen_helper_fmax_d(dest, cpu_env, src1, src2);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -223,11 +313,15 @@ static bool trans_fmax_d(DisasContext *ctx, arg_fmax_d *a)
static bool trans_fcvt_s_d(DisasContext *ctx, arg_fcvt_s_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rs1);
- gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_s_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_s_d(dest, cpu_env, src1);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -235,11 +329,15 @@ static bool trans_fcvt_s_d(DisasContext *ctx, arg_fcvt_s_d *a)
static bool trans_fcvt_d_s(DisasContext *ctx, arg_fcvt_d_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd);
- gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_d_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_d_s(dest, cpu_env, src1);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -247,11 +345,14 @@ static bool trans_fcvt_d_s(DisasContext *ctx, arg_fcvt_d_s *a)
static bool trans_feq_d(DisasContext *ctx, arg_feq_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rs1 | a->rs2);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
- gen_helper_feq_d(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_feq_d(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -259,11 +360,14 @@ static bool trans_feq_d(DisasContext *ctx, arg_feq_d *a)
static bool trans_flt_d(DisasContext *ctx, arg_flt_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rs1 | a->rs2);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
- gen_helper_flt_d(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_flt_d(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -271,11 +375,14 @@ static bool trans_flt_d(DisasContext *ctx, arg_flt_d *a)
static bool trans_fle_d(DisasContext *ctx, arg_fle_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rs1 | a->rs2);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_d(ctx, a->rs2);
- gen_helper_fle_d(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_fle_d(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -283,11 +390,13 @@ static bool trans_fle_d(DisasContext *ctx, arg_fle_d *a)
static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rs1);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
- gen_helper_fclass_d(dest, cpu_fpr[a->rs1]);
+ gen_helper_fclass_d(dest, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -295,12 +404,14 @@ static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a)
static bool trans_fcvt_w_d(DisasContext *ctx, arg_fcvt_w_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rs1);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_w_d(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fcvt_w_d(dest, cpu_env, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -308,12 +419,14 @@ static bool trans_fcvt_w_d(DisasContext *ctx, arg_fcvt_w_d *a)
static bool trans_fcvt_wu_d(DisasContext *ctx, arg_fcvt_wu_d *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rs1);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_wu_d(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fcvt_wu_d(dest, cpu_env, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -321,12 +434,15 @@ static bool trans_fcvt_wu_d(DisasContext *ctx, arg_fcvt_wu_d *a)
static bool trans_fcvt_d_w(DisasContext *ctx, arg_fcvt_d_w *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_d_w(cpu_fpr[a->rd], cpu_env, src);
+ gen_helper_fcvt_d_w(dest, cpu_env, src);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
@@ -335,12 +451,15 @@ static bool trans_fcvt_d_w(DisasContext *ctx, arg_fcvt_d_w *a)
static bool trans_fcvt_d_wu(DisasContext *ctx, arg_fcvt_d_wu *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_d_wu(cpu_fpr[a->rd], cpu_env, src);
+ gen_helper_fcvt_d_wu(dest, cpu_env, src);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
@@ -350,12 +469,14 @@ static bool trans_fcvt_l_d(DisasContext *ctx, arg_fcvt_l_d *a)
{
REQUIRE_64BIT(ctx);
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rs1);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_l_d(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fcvt_l_d(dest, cpu_env, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -364,12 +485,14 @@ static bool trans_fcvt_lu_d(DisasContext *ctx, arg_fcvt_lu_d *a)
{
REQUIRE_64BIT(ctx);
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rs1);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_lu_d(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fcvt_lu_d(dest, cpu_env, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -392,12 +515,15 @@ static bool trans_fcvt_d_l(DisasContext *ctx, arg_fcvt_d_l *a)
{
REQUIRE_64BIT(ctx);
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_d_l(cpu_fpr[a->rd], cpu_env, src);
+ gen_helper_fcvt_d_l(dest, cpu_env, src);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
@@ -407,12 +533,15 @@ static bool trans_fcvt_d_lu(DisasContext *ctx, arg_fcvt_d_lu *a)
{
REQUIRE_64BIT(ctx);
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZDINX_OR_D(ctx);
+ REQUIRE_EVEN(ctx, a->rd);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_d_lu(cpu_fpr[a->rd], cpu_env, src);
+ gen_helper_fcvt_d_lu(dest, cpu_env, src);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
diff --git a/target/riscv/insn_trans/trans_rvf.c.inc b/target/riscv/insn_trans/trans_rvf.c.inc
index 0aac87f7db..a1d3eb52ad 100644
--- a/target/riscv/insn_trans/trans_rvf.c.inc
+++ b/target/riscv/insn_trans/trans_rvf.c.inc
@@ -20,7 +20,14 @@
#define REQUIRE_FPU do {\
if (ctx->mstatus_fs == 0) \
- return false; \
+ if (!ctx->cfg_ptr->ext_zfinx) \
+ return false; \
+} while (0)
+
+#define REQUIRE_ZFINX_OR_F(ctx) do {\
+ if (!ctx->cfg_ptr->ext_zfinx) { \
+ REQUIRE_EXT(ctx, RVF); \
+ } \
} while (0)
static bool trans_flw(DisasContext *ctx, arg_flw *a)
@@ -55,10 +62,16 @@ static bool trans_fsw(DisasContext *ctx, arg_fsw *a)
static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+ TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3);
+
gen_set_rm(ctx, a->rm);
- gen_helper_fmadd_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ gen_helper_fmadd_s(dest, cpu_env, src1, src2, src3);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -66,10 +79,16 @@ static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a)
static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+ TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3);
+
gen_set_rm(ctx, a->rm);
- gen_helper_fmsub_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ gen_helper_fmsub_s(dest, cpu_env, src1, src2, src3);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -77,10 +96,16 @@ static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a)
static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+ TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3);
+
gen_set_rm(ctx, a->rm);
- gen_helper_fnmsub_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ gen_helper_fnmsub_s(dest, cpu_env, src1, src2, src3);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -88,10 +113,16 @@ static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a)
static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+ TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3);
+
gen_set_rm(ctx, a->rm);
- gen_helper_fnmadd_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ gen_helper_fnmadd_s(dest, cpu_env, src1, src2, src3);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -99,11 +130,15 @@ static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a)
static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_set_rm(ctx, a->rm);
- gen_helper_fadd_s(cpu_fpr[a->rd], cpu_env,
- cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_fadd_s(dest, cpu_env, src1, src2);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -111,11 +146,15 @@ static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a)
static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_set_rm(ctx, a->rm);
- gen_helper_fsub_s(cpu_fpr[a->rd], cpu_env,
- cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_fsub_s(dest, cpu_env, src1, src2);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -123,11 +162,15 @@ static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a)
static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_set_rm(ctx, a->rm);
- gen_helper_fmul_s(cpu_fpr[a->rd], cpu_env,
- cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_fmul_s(dest, cpu_env, src1, src2);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -135,11 +178,15 @@ static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a)
static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_set_rm(ctx, a->rm);
- gen_helper_fdiv_s(cpu_fpr[a->rd], cpu_env,
- cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_fdiv_s(dest, cpu_env, src1, src2);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -147,10 +194,14 @@ static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a)
static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fsqrt_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fsqrt_s(dest, cpu_env, src1);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -158,22 +209,37 @@ static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a)
static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
if (a->rs1 == a->rs2) { /* FMOV */
- gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ gen_check_nanbox_s(dest, src1);
+ } else {
+ tcg_gen_ext32s_i64(dest, src1);
+ }
} else { /* FSGNJ */
- TCGv_i64 rs1 = tcg_temp_new_i64();
- TCGv_i64 rs2 = tcg_temp_new_i64();
-
- gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
- gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
-
- /* This formulation retains the nanboxing of rs2. */
- tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);
- tcg_temp_free_i64(rs1);
- tcg_temp_free_i64(rs2);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ TCGv_i64 rs1 = tcg_temp_new_i64();
+ TCGv_i64 rs2 = tcg_temp_new_i64();
+ gen_check_nanbox_s(rs1, src1);
+ gen_check_nanbox_s(rs2, src2);
+
+ /* This formulation retains the nanboxing of rs2 in normal 'F'. */
+ tcg_gen_deposit_i64(dest, rs2, rs1, 0, 31);
+
+ tcg_temp_free_i64(rs1);
+ tcg_temp_free_i64(rs2);
+ } else {
+ tcg_gen_deposit_i64(dest, src2, src1, 0, 31);
+ tcg_gen_ext32s_i64(dest, dest);
+ }
}
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -183,16 +249,27 @@ static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
TCGv_i64 rs1, rs2, mask;
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
- rs1 = tcg_temp_new_i64();
- gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ rs1 = tcg_temp_new_i64();
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ gen_check_nanbox_s(rs1, src1);
+ } else {
+ tcg_gen_mov_i64(rs1, src1);
+ }
if (a->rs1 == a->rs2) { /* FNEG */
- tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));
+ tcg_gen_xori_i64(dest, rs1, MAKE_64BIT_MASK(31, 1));
} else {
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
rs2 = tcg_temp_new_i64();
- gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ gen_check_nanbox_s(rs2, src2);
+ } else {
+ tcg_gen_mov_i64(rs2, src2);
+ }
/*
* Replace bit 31 in rs1 with inverse in rs2.
@@ -200,13 +277,17 @@ static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
*/
mask = tcg_constant_i64(~MAKE_64BIT_MASK(31, 1));
tcg_gen_nor_i64(rs2, rs2, mask);
- tcg_gen_and_i64(rs1, mask, rs1);
- tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
+ tcg_gen_and_i64(dest, mask, rs1);
+ tcg_gen_or_i64(dest, dest, rs2);
tcg_temp_free_i64(rs2);
}
+ /* signed-extended intead of nanboxing for result if enable zfinx */
+ if (ctx->cfg_ptr->ext_zfinx) {
+ tcg_gen_ext32s_i64(dest, dest);
+ }
+ gen_set_fpr_hs(ctx, a->rd, dest);
tcg_temp_free_i64(rs1);
-
mark_fs_dirty(ctx);
return true;
}
@@ -216,28 +297,45 @@ static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
TCGv_i64 rs1, rs2;
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
rs1 = tcg_temp_new_i64();
- gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
+
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ gen_check_nanbox_s(rs1, src1);
+ } else {
+ tcg_gen_mov_i64(rs1, src1);
+ }
if (a->rs1 == a->rs2) { /* FABS */
- tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));
+ tcg_gen_andi_i64(dest, rs1, ~MAKE_64BIT_MASK(31, 1));
} else {
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
rs2 = tcg_temp_new_i64();
- gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
+
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ gen_check_nanbox_s(rs2, src2);
+ } else {
+ tcg_gen_mov_i64(rs2, src2);
+ }
/*
* Xor bit 31 in rs1 with that in rs2.
* This formulation retains the nanboxing of rs1.
*/
- tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));
- tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
+ tcg_gen_andi_i64(dest, rs2, MAKE_64BIT_MASK(31, 1));
+ tcg_gen_xor_i64(dest, rs1, dest);
tcg_temp_free_i64(rs2);
}
+ /* signed-extended intead of nanboxing for result if enable zfinx */
+ if (ctx->cfg_ptr->ext_zfinx) {
+ tcg_gen_ext32s_i64(dest, dest);
+ }
tcg_temp_free_i64(rs1);
-
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -245,10 +343,14 @@ static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
- gen_helper_fmin_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2]);
+ gen_helper_fmin_s(dest, cpu_env, src1, src2);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -256,10 +358,14 @@ static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a)
static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
- gen_helper_fmax_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2]);
+ gen_helper_fmax_s(dest, cpu_env, src1, src2);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -267,12 +373,13 @@ static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a)
static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_w_s(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fcvt_w_s(dest, cpu_env, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -280,12 +387,13 @@ static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a)
static bool trans_fcvt_wu_s(DisasContext *ctx, arg_fcvt_wu_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_wu_s(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fcvt_wu_s(dest, cpu_env, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -294,14 +402,14 @@ static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a)
{
/* NOTE: This was FMV.X.S in an earlier version of the ISA spec! */
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
-
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
#if defined(TARGET_RISCV64)
- tcg_gen_ext32s_tl(dest, cpu_fpr[a->rs1]);
+ tcg_gen_ext32s_tl(dest, src1);
#else
- tcg_gen_extrl_i64_i32(dest, cpu_fpr[a->rs1]);
+ tcg_gen_extrl_i64_i32(dest, src1);
#endif
gen_set_gpr(ctx, a->rd, dest);
@@ -311,11 +419,13 @@ static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a)
static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
- gen_helper_feq_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_feq_s(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -323,11 +433,13 @@ static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a)
static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
- gen_helper_flt_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_flt_s(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -335,11 +447,13 @@ static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a)
static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
- gen_helper_fle_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_fle_s(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -347,11 +461,12 @@ static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a)
static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
- gen_helper_fclass_s(dest, cpu_fpr[a->rs1]);
+ gen_helper_fclass_s(dest, cpu_env, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -359,13 +474,14 @@ static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a)
static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_s_w(cpu_fpr[a->rd], cpu_env, src);
-
+ gen_helper_fcvt_s_w(dest, cpu_env, src);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -373,13 +489,14 @@ static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a)
static bool trans_fcvt_s_wu(DisasContext *ctx, arg_fcvt_s_wu *a)
{
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_s_wu(cpu_fpr[a->rd], cpu_env, src);
-
+ gen_helper_fcvt_s_wu(dest, cpu_env, src);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -388,13 +505,14 @@ static bool trans_fmv_w_x(DisasContext *ctx, arg_fmv_w_x *a)
{
/* NOTE: This was FMV.S.X in an earlier version of the ISA spec! */
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO);
- tcg_gen_extu_tl_i64(cpu_fpr[a->rd], src);
- gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
-
+ tcg_gen_extu_tl_i64(dest, src);
+ gen_nanbox_s(dest, dest);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -403,12 +521,13 @@ static bool trans_fcvt_l_s(DisasContext *ctx, arg_fcvt_l_s *a)
{
REQUIRE_64BIT(ctx);
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_l_s(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fcvt_l_s(dest, cpu_env, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -417,12 +536,13 @@ static bool trans_fcvt_lu_s(DisasContext *ctx, arg_fcvt_lu_s *a)
{
REQUIRE_64BIT(ctx);
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_lu_s(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fcvt_lu_s(dest, cpu_env, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -431,13 +551,14 @@ static bool trans_fcvt_s_l(DisasContext *ctx, arg_fcvt_s_l *a)
{
REQUIRE_64BIT(ctx);
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_s_l(cpu_fpr[a->rd], cpu_env, src);
-
+ gen_helper_fcvt_s_l(dest, cpu_env, src);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -446,13 +567,14 @@ static bool trans_fcvt_s_lu(DisasContext *ctx, arg_fcvt_s_lu *a)
{
REQUIRE_64BIT(ctx);
REQUIRE_FPU;
- REQUIRE_EXT(ctx, RVF);
+ REQUIRE_ZFINX_OR_F(ctx);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_s_lu(cpu_fpr[a->rd], cpu_env, src);
-
+ gen_helper_fcvt_s_lu(dest, cpu_env, src);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
diff --git a/target/riscv/insn_trans/trans_rvzfh.c.inc b/target/riscv/insn_trans/trans_rvzfh.c.inc
index 608c51da2c..5d07150cd0 100644
--- a/target/riscv/insn_trans/trans_rvzfh.c.inc
+++ b/target/riscv/insn_trans/trans_rvzfh.c.inc
@@ -22,12 +22,25 @@
} \
} while (0)
+#define REQUIRE_ZHINX_OR_ZFH(ctx) do { \
+ if (!ctx->cfg_ptr->ext_zhinx && !ctx->cfg_ptr->ext_zfh) { \
+ return false; \
+ } \
+} while (0)
+
#define REQUIRE_ZFH_OR_ZFHMIN(ctx) do { \
if (!(ctx->cfg_ptr->ext_zfh || ctx->cfg_ptr->ext_zfhmin)) { \
return false; \
} \
} while (0)
+#define REQUIRE_ZFH_OR_ZFHMIN_OR_ZHINX_OR_ZHINXMIN(ctx) do { \
+ if (!(ctx->cfg_ptr->ext_zfh || ctx->cfg_ptr->ext_zfhmin || \
+ ctx->cfg_ptr->ext_zhinx || ctx->cfg_ptr->ext_zhinxmin)) { \
+ return false; \
+ } \
+} while (0)
+
static bool trans_flh(DisasContext *ctx, arg_flh *a)
{
TCGv_i64 dest;
@@ -73,11 +86,16 @@ static bool trans_fsh(DisasContext *ctx, arg_fsh *a)
static bool trans_fmadd_h(DisasContext *ctx, arg_fmadd_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+ TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3);
gen_set_rm(ctx, a->rm);
- gen_helper_fmadd_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ gen_helper_fmadd_h(dest, cpu_env, src1, src2, src3);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -85,11 +103,16 @@ static bool trans_fmadd_h(DisasContext *ctx, arg_fmadd_h *a)
static bool trans_fmsub_h(DisasContext *ctx, arg_fmsub_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+ TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3);
gen_set_rm(ctx, a->rm);
- gen_helper_fmsub_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ gen_helper_fmsub_h(dest, cpu_env, src1, src2, src3);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -97,11 +120,16 @@ static bool trans_fmsub_h(DisasContext *ctx, arg_fmsub_h *a)
static bool trans_fnmsub_h(DisasContext *ctx, arg_fnmsub_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+ TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3);
gen_set_rm(ctx, a->rm);
- gen_helper_fnmsub_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ gen_helper_fnmsub_h(dest, cpu_env, src1, src2, src3);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -109,11 +137,16 @@ static bool trans_fnmsub_h(DisasContext *ctx, arg_fnmsub_h *a)
static bool trans_fnmadd_h(DisasContext *ctx, arg_fnmadd_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+ TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3);
gen_set_rm(ctx, a->rm);
- gen_helper_fnmadd_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+ gen_helper_fnmadd_h(dest, cpu_env, src1, src2, src3);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -121,11 +154,15 @@ static bool trans_fnmadd_h(DisasContext *ctx, arg_fnmadd_h *a)
static bool trans_fadd_h(DisasContext *ctx, arg_fadd_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_set_rm(ctx, a->rm);
- gen_helper_fadd_h(cpu_fpr[a->rd], cpu_env,
- cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_fadd_h(dest, cpu_env, src1, src2);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -133,11 +170,15 @@ static bool trans_fadd_h(DisasContext *ctx, arg_fadd_h *a)
static bool trans_fsub_h(DisasContext *ctx, arg_fsub_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_set_rm(ctx, a->rm);
- gen_helper_fsub_h(cpu_fpr[a->rd], cpu_env,
- cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_fsub_h(dest, cpu_env, src1, src2);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -145,11 +186,15 @@ static bool trans_fsub_h(DisasContext *ctx, arg_fsub_h *a)
static bool trans_fmul_h(DisasContext *ctx, arg_fmul_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_set_rm(ctx, a->rm);
- gen_helper_fmul_h(cpu_fpr[a->rd], cpu_env,
- cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_fmul_h(dest, cpu_env, src1, src2);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -157,11 +202,15 @@ static bool trans_fmul_h(DisasContext *ctx, arg_fmul_h *a)
static bool trans_fdiv_h(DisasContext *ctx, arg_fdiv_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
gen_set_rm(ctx, a->rm);
- gen_helper_fdiv_h(cpu_fpr[a->rd], cpu_env,
- cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_fdiv_h(dest, cpu_env, src1, src2);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -169,10 +218,14 @@ static bool trans_fdiv_h(DisasContext *ctx, arg_fdiv_h *a)
static bool trans_fsqrt_h(DisasContext *ctx, arg_fsqrt_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fsqrt_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fsqrt_h(dest, cpu_env, src1);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -180,23 +233,37 @@ static bool trans_fsqrt_h(DisasContext *ctx, arg_fsqrt_h *a)
static bool trans_fsgnj_h(DisasContext *ctx, arg_fsgnj_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
if (a->rs1 == a->rs2) { /* FMOV */
- gen_check_nanbox_h(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ gen_check_nanbox_h(dest, src1);
+ } else {
+ tcg_gen_ext16s_i64(dest, src1);
+ }
} else {
- TCGv_i64 rs1 = tcg_temp_new_i64();
- TCGv_i64 rs2 = tcg_temp_new_i64();
-
- gen_check_nanbox_h(rs1, cpu_fpr[a->rs1]);
- gen_check_nanbox_h(rs2, cpu_fpr[a->rs2]);
-
- /* This formulation retains the nanboxing of rs2. */
- tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 15);
- tcg_temp_free_i64(rs1);
- tcg_temp_free_i64(rs2);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ TCGv_i64 rs1 = tcg_temp_new_i64();
+ TCGv_i64 rs2 = tcg_temp_new_i64();
+ gen_check_nanbox_h(rs1, src1);
+ gen_check_nanbox_h(rs2, src2);
+
+ /* This formulation retains the nanboxing of rs2 in normal 'Zfh'. */
+ tcg_gen_deposit_i64(dest, rs2, rs1, 0, 15);
+
+ tcg_temp_free_i64(rs1);
+ tcg_temp_free_i64(rs2);
+ } else {
+ tcg_gen_deposit_i64(dest, src2, src1, 0, 15);
+ tcg_gen_ext16s_i64(dest, dest);
+ }
}
-
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -206,16 +273,29 @@ static bool trans_fsgnjn_h(DisasContext *ctx, arg_fsgnjn_h *a)
TCGv_i64 rs1, rs2, mask;
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
rs1 = tcg_temp_new_i64();
- gen_check_nanbox_h(rs1, cpu_fpr[a->rs1]);
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ gen_check_nanbox_h(rs1, src1);
+ } else {
+ tcg_gen_mov_i64(rs1, src1);
+ }
if (a->rs1 == a->rs2) { /* FNEG */
- tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(15, 1));
+ tcg_gen_xori_i64(dest, rs1, MAKE_64BIT_MASK(15, 1));
} else {
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
rs2 = tcg_temp_new_i64();
- gen_check_nanbox_h(rs2, cpu_fpr[a->rs2]);
+
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ gen_check_nanbox_h(rs2, src2);
+ } else {
+ tcg_gen_mov_i64(rs2, src2);
+ }
/*
* Replace bit 15 in rs1 with inverse in rs2.
@@ -224,12 +304,17 @@ static bool trans_fsgnjn_h(DisasContext *ctx, arg_fsgnjn_h *a)
mask = tcg_const_i64(~MAKE_64BIT_MASK(15, 1));
tcg_gen_not_i64(rs2, rs2);
tcg_gen_andc_i64(rs2, rs2, mask);
- tcg_gen_and_i64(rs1, mask, rs1);
- tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
+ tcg_gen_and_i64(dest, mask, rs1);
+ tcg_gen_or_i64(dest, dest, rs2);
tcg_temp_free_i64(mask);
tcg_temp_free_i64(rs2);
}
+ /* signed-extended intead of nanboxing for result if enable zfinx */
+ if (ctx->cfg_ptr->ext_zfinx) {
+ tcg_gen_ext16s_i64(dest, dest);
+ }
+ tcg_temp_free_i64(rs1);
mark_fs_dirty(ctx);
return true;
}
@@ -239,27 +324,44 @@ static bool trans_fsgnjx_h(DisasContext *ctx, arg_fsgnjx_h *a)
TCGv_i64 rs1, rs2;
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
rs1 = tcg_temp_new_i64();
- gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ gen_check_nanbox_h(rs1, src1);
+ } else {
+ tcg_gen_mov_i64(rs1, src1);
+ }
if (a->rs1 == a->rs2) { /* FABS */
- tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(15, 1));
+ tcg_gen_andi_i64(dest, rs1, ~MAKE_64BIT_MASK(15, 1));
} else {
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
rs2 = tcg_temp_new_i64();
- gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
+
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ gen_check_nanbox_h(rs2, src2);
+ } else {
+ tcg_gen_mov_i64(rs2, src2);
+ }
/*
* Xor bit 15 in rs1 with that in rs2.
* This formulation retains the nanboxing of rs1.
*/
- tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(15, 1));
- tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
+ tcg_gen_andi_i64(dest, rs2, MAKE_64BIT_MASK(15, 1));
+ tcg_gen_xor_i64(dest, rs1, dest);
tcg_temp_free_i64(rs2);
}
-
+ /* signed-extended intead of nanboxing for result if enable zfinx */
+ if (ctx->cfg_ptr->ext_zfinx) {
+ tcg_gen_ext16s_i64(dest, dest);
+ }
+ tcg_temp_free_i64(rs1);
mark_fs_dirty(ctx);
return true;
}
@@ -267,10 +369,14 @@ static bool trans_fsgnjx_h(DisasContext *ctx, arg_fsgnjx_h *a)
static bool trans_fmin_h(DisasContext *ctx, arg_fmin_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
- gen_helper_fmin_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2]);
+ gen_helper_fmin_h(dest, cpu_env, src1, src2);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -278,10 +384,14 @@ static bool trans_fmin_h(DisasContext *ctx, arg_fmin_h *a)
static bool trans_fmax_h(DisasContext *ctx, arg_fmax_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
- gen_helper_fmax_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
- cpu_fpr[a->rs2]);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
+
+ gen_helper_fmax_h(dest, cpu_env, src1, src2);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
}
@@ -289,10 +399,14 @@ static bool trans_fmax_h(DisasContext *ctx, arg_fmax_h *a)
static bool trans_fcvt_s_h(DisasContext *ctx, arg_fcvt_s_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH_OR_ZFHMIN(ctx);
+ REQUIRE_ZFH_OR_ZFHMIN_OR_ZHINX_OR_ZHINXMIN(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_s_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fcvt_s_h(dest, cpu_env, src1);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
@@ -302,26 +416,32 @@ static bool trans_fcvt_s_h(DisasContext *ctx, arg_fcvt_s_h *a)
static bool trans_fcvt_d_h(DisasContext *ctx, arg_fcvt_d_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH_OR_ZFHMIN(ctx);
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZFH_OR_ZFHMIN_OR_ZHINX_OR_ZHINXMIN(ctx);
+ REQUIRE_ZDINX_OR_D(ctx);
+
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_d_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fcvt_d_h(dest, cpu_env, src1);
+ gen_set_fpr_d(ctx, a->rd, dest);
mark_fs_dirty(ctx);
-
return true;
}
static bool trans_fcvt_h_s(DisasContext *ctx, arg_fcvt_h_s *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH_OR_ZFHMIN(ctx);
+ REQUIRE_ZFH_OR_ZFHMIN_OR_ZHINX_OR_ZHINXMIN(ctx);
- gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_h_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_h_s(dest, cpu_env, src1);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
@@ -330,12 +450,15 @@ static bool trans_fcvt_h_s(DisasContext *ctx, arg_fcvt_h_s *a)
static bool trans_fcvt_h_d(DisasContext *ctx, arg_fcvt_h_d *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH_OR_ZFHMIN(ctx);
- REQUIRE_EXT(ctx, RVD);
+ REQUIRE_ZFH_OR_ZFHMIN_OR_ZHINX_OR_ZHINXMIN(ctx);
+ REQUIRE_ZDINX_OR_D(ctx);
- gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_h_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_d(ctx, a->rs1);
+ gen_set_rm(ctx, a->rm);
+ gen_helper_fcvt_h_d(dest, cpu_env, src1);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
@@ -344,11 +467,13 @@ static bool trans_fcvt_h_d(DisasContext *ctx, arg_fcvt_h_d *a)
static bool trans_feq_h(DisasContext *ctx, arg_feq_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
- gen_helper_feq_h(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_feq_h(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -356,11 +481,13 @@ static bool trans_feq_h(DisasContext *ctx, arg_feq_h *a)
static bool trans_flt_h(DisasContext *ctx, arg_flt_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
- gen_helper_flt_h(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_flt_h(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
@@ -369,11 +496,13 @@ static bool trans_flt_h(DisasContext *ctx, arg_flt_h *a)
static bool trans_fle_h(DisasContext *ctx, arg_fle_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
+ TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2);
- gen_helper_fle_h(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+ gen_helper_fle_h(dest, cpu_env, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -381,11 +510,12 @@ static bool trans_fle_h(DisasContext *ctx, arg_fle_h *a)
static bool trans_fclass_h(DisasContext *ctx, arg_fclass_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
- gen_helper_fclass_h(dest, cpu_fpr[a->rs1]);
+ gen_helper_fclass_h(dest, cpu_env, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -393,12 +523,13 @@ static bool trans_fclass_h(DisasContext *ctx, arg_fclass_h *a)
static bool trans_fcvt_w_h(DisasContext *ctx, arg_fcvt_w_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_w_h(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fcvt_w_h(dest, cpu_env, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -406,12 +537,13 @@ static bool trans_fcvt_w_h(DisasContext *ctx, arg_fcvt_w_h *a)
static bool trans_fcvt_wu_h(DisasContext *ctx, arg_fcvt_wu_h *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_wu_h(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fcvt_wu_h(dest, cpu_env, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -419,12 +551,14 @@ static bool trans_fcvt_wu_h(DisasContext *ctx, arg_fcvt_wu_h *a)
static bool trans_fcvt_h_w(DisasContext *ctx, arg_fcvt_h_w *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv t0 = get_gpr(ctx, a->rs1, EXT_SIGN);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_h_w(cpu_fpr[a->rd], cpu_env, t0);
+ gen_helper_fcvt_h_w(dest, cpu_env, t0);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
@@ -433,12 +567,14 @@ static bool trans_fcvt_h_w(DisasContext *ctx, arg_fcvt_h_w *a)
static bool trans_fcvt_h_wu(DisasContext *ctx, arg_fcvt_h_wu *a)
{
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv t0 = get_gpr(ctx, a->rs1, EXT_SIGN);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_h_wu(cpu_fpr[a->rd], cpu_env, t0);
+ gen_helper_fcvt_h_wu(dest, cpu_env, t0);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
@@ -482,12 +618,13 @@ static bool trans_fcvt_l_h(DisasContext *ctx, arg_fcvt_l_h *a)
{
REQUIRE_64BIT(ctx);
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_l_h(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fcvt_l_h(dest, cpu_env, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -496,12 +633,13 @@ static bool trans_fcvt_lu_h(DisasContext *ctx, arg_fcvt_lu_h *a)
{
REQUIRE_64BIT(ctx);
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
TCGv dest = dest_gpr(ctx, a->rd);
+ TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_lu_h(dest, cpu_env, cpu_fpr[a->rs1]);
+ gen_helper_fcvt_lu_h(dest, cpu_env, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
@@ -510,12 +648,14 @@ static bool trans_fcvt_h_l(DisasContext *ctx, arg_fcvt_h_l *a)
{
REQUIRE_64BIT(ctx);
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv t0 = get_gpr(ctx, a->rs1, EXT_SIGN);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_h_l(cpu_fpr[a->rd], cpu_env, t0);
+ gen_helper_fcvt_h_l(dest, cpu_env, t0);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
@@ -525,12 +665,14 @@ static bool trans_fcvt_h_lu(DisasContext *ctx, arg_fcvt_h_lu *a)
{
REQUIRE_64BIT(ctx);
REQUIRE_FPU;
- REQUIRE_ZFH(ctx);
+ REQUIRE_ZHINX_OR_ZFH(ctx);
+ TCGv_i64 dest = dest_fpr(ctx, a->rd);
TCGv t0 = get_gpr(ctx, a->rs1, EXT_SIGN);
gen_set_rm(ctx, a->rm);
- gen_helper_fcvt_h_lu(cpu_fpr[a->rd], cpu_env, t0);
+ gen_helper_fcvt_h_lu(dest, cpu_env, t0);
+ gen_set_fpr_hs(ctx, a->rd, dest);
mark_fs_dirty(ctx);
return true;
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index 065e8162a2..dbb322bfa7 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -46,13 +46,23 @@ enum {
RISCV_FRM_ROD = 8, /* Round to Odd */
};
-static inline uint64_t nanbox_s(float32 f)
+static inline uint64_t nanbox_s(CPURISCVState *env, float32 f)
{
- return f | MAKE_64BIT_MASK(32, 32);
+ /* the value is sign-extended instead of NaN-boxing for zfinx */
+ if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) {
+ return (int32_t)f;
+ } else {
+ return f | MAKE_64BIT_MASK(32, 32);
+ }
}
-static inline float32 check_nanbox_s(uint64_t f)
+static inline float32 check_nanbox_s(CPURISCVState *env, uint64_t f)
{
+ /* Disable NaN-boxing check when enable zfinx */
+ if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) {
+ return (uint32_t)f;
+ }
+
uint64_t mask = MAKE_64BIT_MASK(32, 32);
if (likely((f & mask) == mask)) {
@@ -62,13 +72,23 @@ static inline float32 check_nanbox_s(uint64_t f)
}
}
-static inline uint64_t nanbox_h(float16 f)
+static inline uint64_t nanbox_h(CPURISCVState *env, float16 f)
{
- return f | MAKE_64BIT_MASK(16, 48);
+ /* the value is sign-extended instead of NaN-boxing for zfinx */
+ if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) {
+ return (int16_t)f;
+ } else {
+ return f | MAKE_64BIT_MASK(16, 48);
+ }
}
-static inline float16 check_nanbox_h(uint64_t f)
+static inline float16 check_nanbox_h(CPURISCVState *env, uint64_t f)
{
+ /* Disable nanbox check when enable zfinx */
+ if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) {
+ return (uint16_t)f;
+ }
+
uint64_t mask = MAKE_64BIT_MASK(16, 48);
if (likely((f & mask) == mask)) {
diff --git a/target/riscv/pmp.h b/target/riscv/pmp.h
index a9a0b363a7..fcb6b7c467 100644
--- a/target/riscv/pmp.h
+++ b/target/riscv/pmp.h
@@ -22,6 +22,8 @@
#ifndef RISCV_PMP_H
#define RISCV_PMP_H
+#include "cpu.h"
+
typedef enum {
PMP_READ = 1 << 0,
PMP_WRITE = 1 << 1,
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 84dbfa6340..fac998a6b5 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -101,6 +101,9 @@ typedef struct DisasContext {
TCGv zero;
/* Space for 3 operands plus 1 extra for address computation. */
TCGv temp[4];
+ /* Space for 4 operands(1 dest and <=3 src) for float point computation */
+ TCGv_i64 ftemp[4];
+ uint8_t nftemp;
/* PointerMasking extension */
bool pm_mask_enabled;
bool pm_base_enabled;
@@ -380,6 +383,138 @@ static void gen_set_gpr128(DisasContext *ctx, int reg_num, TCGv rl, TCGv rh)
}
}
+static TCGv_i64 ftemp_new(DisasContext *ctx)
+{
+ assert(ctx->nftemp < ARRAY_SIZE(ctx->ftemp));
+ return ctx->ftemp[ctx->nftemp++] = tcg_temp_new_i64();
+}
+
+static TCGv_i64 get_fpr_hs(DisasContext *ctx, int reg_num)
+{
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ return cpu_fpr[reg_num];
+ }
+
+ if (reg_num == 0) {
+ return tcg_constant_i64(0);
+ }
+ switch (get_xl(ctx)) {
+ case MXL_RV32:
+#ifdef TARGET_RISCV32
+ {
+ TCGv_i64 t = ftemp_new(ctx);
+ tcg_gen_ext_i32_i64(t, cpu_gpr[reg_num]);
+ return t;
+ }
+#else
+ /* fall through */
+ case MXL_RV64:
+ return cpu_gpr[reg_num];
+#endif
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static TCGv_i64 get_fpr_d(DisasContext *ctx, int reg_num)
+{
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ return cpu_fpr[reg_num];
+ }
+
+ if (reg_num == 0) {
+ return tcg_constant_i64(0);
+ }
+ switch (get_xl(ctx)) {
+ case MXL_RV32:
+ {
+ TCGv_i64 t = ftemp_new(ctx);
+ tcg_gen_concat_tl_i64(t, cpu_gpr[reg_num], cpu_gpr[reg_num + 1]);
+ return t;
+ }
+#ifdef TARGET_RISCV64
+ case MXL_RV64:
+ return cpu_gpr[reg_num];
+#endif
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static TCGv_i64 dest_fpr(DisasContext *ctx, int reg_num)
+{
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ return cpu_fpr[reg_num];
+ }
+
+ if (reg_num == 0) {
+ return ftemp_new(ctx);
+ }
+
+ switch (get_xl(ctx)) {
+ case MXL_RV32:
+ return ftemp_new(ctx);
+#ifdef TARGET_RISCV64
+ case MXL_RV64:
+ return cpu_gpr[reg_num];
+#endif
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/* assume t is nanboxing (for normal) or sign-extended (for zfinx) */
+static void gen_set_fpr_hs(DisasContext *ctx, int reg_num, TCGv_i64 t)
+{
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ tcg_gen_mov_i64(cpu_fpr[reg_num], t);
+ return;
+ }
+ if (reg_num != 0) {
+ switch (get_xl(ctx)) {
+ case MXL_RV32:
+#ifdef TARGET_RISCV32
+ tcg_gen_extrl_i64_i32(cpu_gpr[reg_num], t);
+ break;
+#else
+ /* fall through */
+ case MXL_RV64:
+ tcg_gen_mov_i64(cpu_gpr[reg_num], t);
+ break;
+#endif
+ default:
+ g_assert_not_reached();
+ }
+ }
+}
+
+static void gen_set_fpr_d(DisasContext *ctx, int reg_num, TCGv_i64 t)
+{
+ if (!ctx->cfg_ptr->ext_zfinx) {
+ tcg_gen_mov_i64(cpu_fpr[reg_num], t);
+ return;
+ }
+
+ if (reg_num != 0) {
+ switch (get_xl(ctx)) {
+ case MXL_RV32:
+#ifdef TARGET_RISCV32
+ tcg_gen_extr_i64_i32(cpu_gpr[reg_num], cpu_gpr[reg_num + 1], t);
+ break;
+#else
+ tcg_gen_ext32s_i64(cpu_gpr[reg_num], t);
+ tcg_gen_sari_i64(cpu_gpr[reg_num + 1], t, 32);
+ break;
+ case MXL_RV64:
+ tcg_gen_mov_i64(cpu_gpr[reg_num], t);
+ break;
+#endif
+ default:
+ g_assert_not_reached();
+ }
+ }
+}
+
static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
{
target_ulong next_pc;
@@ -426,6 +561,10 @@ static void mark_fs_dirty(DisasContext *ctx)
{
TCGv tmp;
+ if (!has_ext(ctx, RVF)) {
+ return;
+ }
+
if (ctx->mstatus_fs != MSTATUS_FS) {
/* Remember the state change for the rest of the TB. */
ctx->mstatus_fs = MSTATUS_FS;
@@ -951,6 +1090,8 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
ctx->cs = cs;
ctx->ntemp = 0;
memset(ctx->temp, 0, sizeof(ctx->temp));
+ ctx->nftemp = 0;
+ memset(ctx->ftemp, 0, sizeof(ctx->ftemp));
ctx->pm_mask_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_MASK_ENABLED);
ctx->pm_base_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_BASE_ENABLED);
ctx->zero = tcg_constant_tl(0);
@@ -972,16 +1113,22 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
DisasContext *ctx = container_of(dcbase, DisasContext, base);
CPURISCVState *env = cpu->env_ptr;
uint16_t opcode16 = translator_lduw(env, &ctx->base, ctx->base.pc_next);
+ int i;
ctx->ol = ctx->xl;
decode_opc(env, ctx, opcode16);
ctx->base.pc_next = ctx->pc_succ_insn;
- for (int i = ctx->ntemp - 1; i >= 0; --i) {
+ for (i = ctx->ntemp - 1; i >= 0; --i) {
tcg_temp_free(ctx->temp[i]);
ctx->temp[i] = NULL;
}
ctx->ntemp = 0;
+ for (i = ctx->nftemp - 1; i >= 0; --i) {
+ tcg_temp_free_i64(ctx->ftemp[i]);
+ ctx->ftemp[i] = NULL;
+ }
+ ctx->nftemp = 0;
if (ctx->base.is_jmp == DISAS_NEXT) {
target_ulong page_start;
diff --git a/target/rx/cpu-qom.h b/target/rx/cpu-qom.h
index 7310558e0c..4533759d96 100644
--- a/target/rx/cpu-qom.h
+++ b/target/rx/cpu-qom.h
@@ -26,8 +26,7 @@
#define TYPE_RX62N_CPU RX_CPU_TYPE_NAME("rx62n")
-OBJECT_DECLARE_TYPE(RXCPU, RXCPUClass,
- RX_CPU)
+OBJECT_DECLARE_CPU_TYPE(RXCPU, RXCPUClass, RX_CPU)
/*
* RXCPUClass:
@@ -45,6 +44,4 @@ struct RXCPUClass {
DeviceReset parent_reset;
};
-#define CPUArchState struct CPURXState
-
#endif
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
index 58adf9edf6..b4abd90ccd 100644
--- a/target/rx/cpu.h
+++ b/target/rx/cpu.h
@@ -65,7 +65,7 @@ enum {
NUM_REGS = 16,
};
-typedef struct CPURXState {
+typedef struct CPUArchState {
/* CPU registers */
uint32_t regs[NUM_REGS]; /* general registers */
uint32_t psw_o; /* O bit of status register */
@@ -105,7 +105,7 @@ typedef struct CPURXState {
*
* A RX CPU
*/
-struct RXCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -114,8 +114,6 @@ struct RXCPU {
CPURXState env;
};
-typedef RXCPU ArchCPU;
-
#define RX_CPU_TYPE_SUFFIX "-" TYPE_RX_CPU
#define RX_CPU_TYPE_NAME(model) model RX_CPU_TYPE_SUFFIX
#define CPU_RESOLVING_TYPE TYPE_RX_CPU
diff --git a/target/s390x/cpu-qom.h b/target/s390x/cpu-qom.h
index 9f3a0d86c5..00cae2b131 100644
--- a/target/s390x/cpu-qom.h
+++ b/target/s390x/cpu-qom.h
@@ -25,12 +25,13 @@
#define TYPE_S390_CPU "s390x-cpu"
-OBJECT_DECLARE_TYPE(S390CPU, S390CPUClass,
- S390_CPU)
+OBJECT_DECLARE_CPU_TYPE(S390CPU, S390CPUClass, S390_CPU)
typedef struct S390CPUModel S390CPUModel;
typedef struct S390CPUDef S390CPUDef;
+typedef struct CPUArchState CPUS390XState;
+
typedef enum cpu_reset_type {
S390_CPU_RESET_NORMAL,
S390_CPU_RESET_INITIAL,
@@ -63,6 +64,4 @@ struct S390CPUClass {
void (*reset)(CPUState *cpu, cpu_reset_type type);
};
-typedef struct CPUS390XState CPUS390XState;
-
#endif
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index a75e559134..c49c8466e7 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -51,7 +51,7 @@ typedef struct PSW {
uint64_t addr;
} PSW;
-struct CPUS390XState {
+struct CPUArchState {
uint64_t regs[16]; /* GP registers */
/*
* The floating point registers are part of the vector registers.
@@ -163,7 +163,7 @@ static inline uint64_t *get_freg(CPUS390XState *cs, int nr)
*
* An S/390 CPU.
*/
-struct S390CPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -840,9 +840,6 @@ uint64_t s390_cpu_get_psw_mask(CPUS390XState *env);
/* outside of target/s390x/ */
S390CPU *s390_cpu_addr2state(uint16_t cpu_addr);
-typedef CPUS390XState CPUArchState;
-typedef S390CPU ArchCPU;
-
#include "exec/cpu-all.h"
#endif
diff --git a/target/sh4/cpu-qom.h b/target/sh4/cpu-qom.h
index 8903b4b9c7..d4192d1090 100644
--- a/target/sh4/cpu-qom.h
+++ b/target/sh4/cpu-qom.h
@@ -29,8 +29,7 @@
#define TYPE_SH7751R_CPU SUPERH_CPU_TYPE_NAME("sh7751r")
#define TYPE_SH7785_CPU SUPERH_CPU_TYPE_NAME("sh7785")
-OBJECT_DECLARE_TYPE(SuperHCPU, SuperHCPUClass,
- SUPERH_CPU)
+OBJECT_DECLARE_CPU_TYPE(SuperHCPU, SuperHCPUClass, SUPERH_CPU)
/**
* SuperHCPUClass:
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index fb9dd9db2f..c72a30edfd 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -130,7 +130,7 @@ typedef struct memory_content {
struct memory_content *next;
} memory_content;
-typedef struct CPUSH4State {
+typedef struct CPUArchState {
uint32_t flags; /* general execution flags */
uint32_t gregs[24]; /* general registers */
float32 fregs[32]; /* floating point registers */
@@ -195,7 +195,7 @@ typedef struct CPUSH4State {
*
* A SuperH CPU.
*/
-struct SuperHCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -264,9 +264,6 @@ static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch)
}
}
-typedef CPUSH4State CPUArchState;
-typedef SuperHCPU ArchCPU;
-
#include "exec/cpu-all.h"
/* MMU control register */
diff --git a/target/sparc/cpu-qom.h b/target/sparc/cpu-qom.h
index f33949aaee..86ed37d933 100644
--- a/target/sparc/cpu-qom.h
+++ b/target/sparc/cpu-qom.h
@@ -29,8 +29,7 @@
#define TYPE_SPARC_CPU "sparc-cpu"
#endif
-OBJECT_DECLARE_TYPE(SPARCCPU, SPARCCPUClass,
- SPARC_CPU)
+OBJECT_DECLARE_CPU_TYPE(SPARCCPU, SPARCCPUClass, SPARC_CPU)
typedef struct sparc_def_t sparc_def_t;
/**
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
index 5a7f1ed5d6..abb38db674 100644
--- a/target/sparc/cpu.h
+++ b/target/sparc/cpu.h
@@ -420,7 +420,7 @@ struct CPUTimer
typedef struct CPUTimer CPUTimer;
-typedef struct CPUSPARCState CPUSPARCState;
+typedef struct CPUArchState CPUSPARCState;
#if defined(TARGET_SPARC64)
typedef union {
uint64_t mmuregs[16];
@@ -439,7 +439,7 @@ typedef union {
};
} SparcV9MMU;
#endif
-struct CPUSPARCState {
+struct CPUArchState {
target_ulong gregs[8]; /* general registers */
target_ulong *regwptr; /* pointer to current register window */
target_ulong pc; /* program counter */
@@ -556,7 +556,7 @@ struct CPUSPARCState {
*
* A SPARC CPU.
*/
-struct SPARCCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -743,9 +743,6 @@ static inline int cpu_pil_allowed(CPUSPARCState *env1, int pil)
#endif
}
-typedef CPUSPARCState CPUArchState;
-typedef SPARCCPU ArchCPU;
-
#include "exec/cpu-all.h"
#ifdef TARGET_SPARC64
diff --git a/target/tricore/cpu-qom.h b/target/tricore/cpu-qom.h
index 59bfd01bbc..ee24e9fa76 100644
--- a/target/tricore/cpu-qom.h
+++ b/target/tricore/cpu-qom.h
@@ -24,8 +24,7 @@
#define TYPE_TRICORE_CPU "tricore-cpu"
-OBJECT_DECLARE_TYPE(TriCoreCPU, TriCoreCPUClass,
- TRICORE_CPU)
+OBJECT_DECLARE_CPU_TYPE(TriCoreCPU, TriCoreCPUClass, TRICORE_CPU)
struct TriCoreCPUClass {
/*< private >*/
diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
index c461387e71..108d6b8288 100644
--- a/target/tricore/cpu.h
+++ b/target/tricore/cpu.h
@@ -28,8 +28,7 @@ struct tricore_boot_info;
typedef struct tricore_def_t tricore_def_t;
-typedef struct CPUTriCoreState CPUTriCoreState;
-struct CPUTriCoreState {
+typedef struct CPUArchState {
/* GPR Register */
uint32_t gpr_a[16];
uint32_t gpr_d[16];
@@ -189,7 +188,7 @@ struct CPUTriCoreState {
const tricore_def_t *cpu_model;
void *irq[8];
struct QEMUTimer *timer; /* Internal timer */
-};
+} CPUTriCoreState;
/**
* TriCoreCPU:
@@ -197,7 +196,7 @@ struct CPUTriCoreState {
*
* A TriCore CPU.
*/
-struct TriCoreCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -369,9 +368,6 @@ static inline int cpu_mmu_index(CPUTriCoreState *env, bool ifetch)
return 0;
}
-typedef CPUTriCoreState CPUArchState;
-typedef TriCoreCPU ArchCPU;
-
#include "exec/cpu-all.h"
void cpu_state_reset(CPUTriCoreState *s);
diff --git a/target/xtensa/cpu-qom.h b/target/xtensa/cpu-qom.h
index 41d9859673..4fc35ee49b 100644
--- a/target/xtensa/cpu-qom.h
+++ b/target/xtensa/cpu-qom.h
@@ -34,8 +34,7 @@
#define TYPE_XTENSA_CPU "xtensa-cpu"
-OBJECT_DECLARE_TYPE(XtensaCPU, XtensaCPUClass,
- XTENSA_CPU)
+OBJECT_DECLARE_CPU_TYPE(XtensaCPU, XtensaCPUClass, XTENSA_CPU)
typedef struct XtensaConfig XtensaConfig;
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
index 02143f2f77..4515f682aa 100644
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -306,7 +306,7 @@ typedef enum {
INTTYPE_MAX
} interrupt_type;
-struct CPUXtensaState;
+typedef struct CPUArchState CPUXtensaState;
typedef struct xtensa_tlb_entry {
uint32_t vaddr;
@@ -344,7 +344,7 @@ typedef struct XtensaGdbRegmap {
} XtensaGdbRegmap;
typedef struct XtensaCcompareTimer {
- struct CPUXtensaState *env;
+ CPUXtensaState *env;
QEMUTimer *timer;
} XtensaCcompareTimer;
@@ -506,7 +506,7 @@ enum {
};
#endif
-typedef struct CPUXtensaState {
+struct CPUArchState {
const XtensaConfig *config;
uint32_t regs[16];
uint32_t pc;
@@ -545,7 +545,7 @@ typedef struct CPUXtensaState {
/* Watchpoints for DBREAK registers */
struct CPUWatchpoint *cpu_watchpoint[MAX_NDBREAK];
-} CPUXtensaState;
+};
/**
* XtensaCPU:
@@ -553,7 +553,7 @@ typedef struct CPUXtensaState {
*
* An Xtensa CPU.
*/
-struct XtensaCPU {
+struct ArchCPU {
/*< private >*/
CPUState parent_obj;
/*< public >*/
@@ -722,9 +722,6 @@ static inline int cpu_mmu_index(CPUXtensaState *env, bool ifetch)
#define XTENSA_CSBASE_LBEG_OFF_MASK 0x00ff0000
#define XTENSA_CSBASE_LBEG_OFF_SHIFT 16
-typedef CPUXtensaState CPUArchState;
-typedef XtensaCPU ArchCPU;
-
#include "exec/cpu-all.h"
static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc,
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 876af589ce..485f685bd2 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -131,6 +131,9 @@ typedef enum {
#define TCG_TARGET_HAS_andc_vec 1
#define TCG_TARGET_HAS_orc_vec 1
+#define TCG_TARGET_HAS_nand_vec 0
+#define TCG_TARGET_HAS_nor_vec 0
+#define TCG_TARGET_HAS_eqv_vec 0
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec 1
#define TCG_TARGET_HAS_abs_vec 1
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 27c27a1f14..7e96495392 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -130,6 +130,9 @@ extern bool use_neon_instructions;
#define TCG_TARGET_HAS_andc_vec 1
#define TCG_TARGET_HAS_orc_vec 1
+#define TCG_TARGET_HAS_nand_vec 0
+#define TCG_TARGET_HAS_nor_vec 0
+#define TCG_TARGET_HAS_eqv_vec 0
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec 1
#define TCG_TARGET_HAS_abs_vec 1
diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h
index 78774d1005..91ceb0e1da 100644
--- a/tcg/i386/tcg-target-con-set.h
+++ b/tcg/i386/tcg-target-con-set.h
@@ -45,6 +45,7 @@ C_O1_I2(r, r, rI)
C_O1_I2(x, x, x)
C_N1_I2(r, r, r)
C_N1_I2(r, r, rW)
+C_O1_I3(x, 0, x, x)
C_O1_I3(x, x, x, x)
C_O1_I4(r, r, re, r, 0)
C_O1_I4(r, r, r, ri, ri)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index faa15eecab..b5c6159853 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -171,6 +171,10 @@ bool have_bmi1;
bool have_popcnt;
bool have_avx1;
bool have_avx2;
+bool have_avx512bw;
+bool have_avx512dq;
+bool have_avx512vbmi2;
+bool have_avx512vl;
bool have_movbe;
#ifdef CONFIG_CPUID_H
@@ -258,6 +262,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define P_SIMDF3 0x20000 /* 0xf3 opcode prefix */
#define P_SIMDF2 0x40000 /* 0xf2 opcode prefix */
#define P_VEXL 0x80000 /* Set VEX.L = 1 */
+#define P_EVEX 0x100000 /* Requires EVEX encoding */
#define OPC_ARITH_EvIz (0x81)
#define OPC_ARITH_EvIb (0x83)
@@ -308,6 +313,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_PABSB (0x1c | P_EXT38 | P_DATA16)
#define OPC_PABSW (0x1d | P_EXT38 | P_DATA16)
#define OPC_PABSD (0x1e | P_EXT38 | P_DATA16)
+#define OPC_VPABSQ (0x1f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16)
#define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16)
#define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16)
@@ -334,15 +340,19 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16)
#define OPC_PMAXSW (0xee | P_EXT | P_DATA16)
#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16)
+#define OPC_VPMAXSQ (0x3d | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_PMAXUB (0xde | P_EXT | P_DATA16)
#define OPC_PMAXUW (0x3e | P_EXT38 | P_DATA16)
#define OPC_PMAXUD (0x3f | P_EXT38 | P_DATA16)
+#define OPC_VPMAXUQ (0x3f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_PMINSB (0x38 | P_EXT38 | P_DATA16)
#define OPC_PMINSW (0xea | P_EXT | P_DATA16)
#define OPC_PMINSD (0x39 | P_EXT38 | P_DATA16)
+#define OPC_VPMINSQ (0x39 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_PMINUB (0xda | P_EXT | P_DATA16)
#define OPC_PMINUW (0x3a | P_EXT38 | P_DATA16)
#define OPC_PMINUD (0x3b | P_EXT38 | P_DATA16)
+#define OPC_VPMINUQ (0x3b | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_PMOVSXBW (0x20 | P_EXT38 | P_DATA16)
#define OPC_PMOVSXWD (0x23 | P_EXT38 | P_DATA16)
#define OPC_PMOVSXDQ (0x25 | P_EXT38 | P_DATA16)
@@ -351,19 +361,21 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_PMOVZXDQ (0x35 | P_EXT38 | P_DATA16)
#define OPC_PMULLW (0xd5 | P_EXT | P_DATA16)
#define OPC_PMULLD (0x40 | P_EXT38 | P_DATA16)
+#define OPC_VPMULLQ (0x40 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_POR (0xeb | P_EXT | P_DATA16)
#define OPC_PSHUFB (0x00 | P_EXT38 | P_DATA16)
#define OPC_PSHUFD (0x70 | P_EXT | P_DATA16)
#define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2)
#define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3)
#define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */
-#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */
+#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */
#define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */
#define OPC_PSLLW (0xf1 | P_EXT | P_DATA16)
#define OPC_PSLLD (0xf2 | P_EXT | P_DATA16)
#define OPC_PSLLQ (0xf3 | P_EXT | P_DATA16)
#define OPC_PSRAW (0xe1 | P_EXT | P_DATA16)
#define OPC_PSRAD (0xe2 | P_EXT | P_DATA16)
+#define OPC_VPSRAQ (0x72 | P_EXT | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_PSRLW (0xd1 | P_EXT | P_DATA16)
#define OPC_PSRLD (0xd2 | P_EXT | P_DATA16)
#define OPC_PSRLQ (0xd3 | P_EXT | P_DATA16)
@@ -414,11 +426,29 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW)
#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
+#define OPC_VPROLVD (0x15 | P_EXT38 | P_DATA16 | P_EVEX)
+#define OPC_VPROLVQ (0x15 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPRORVD (0x14 | P_EXT38 | P_DATA16 | P_EVEX)
+#define OPC_VPRORVQ (0x14 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHLDW (0x70 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHLDD (0x71 | P_EXT3A | P_DATA16 | P_EVEX)
+#define OPC_VPSHLDQ (0x71 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHLDVW (0x70 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHLDVD (0x71 | P_EXT38 | P_DATA16 | P_EVEX)
+#define OPC_VPSHLDVQ (0x71 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHRDVW (0x72 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSHRDVD (0x73 | P_EXT38 | P_DATA16 | P_EVEX)
+#define OPC_VPSHRDVQ (0x73 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSLLVW (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16)
#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW)
+#define OPC_VPSRAVW (0x11 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16)
+#define OPC_VPSRAVQ (0x46 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPSRLVW (0x10 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16)
#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW)
+#define OPC_VPTERNLOGQ (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VZEROUPPER (0x77 | P_EXT)
#define OPC_XCHG_ax_r32 (0x90)
@@ -622,9 +652,57 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v,
tcg_out8(s, opc);
}
+static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v,
+ int rm, int index)
+{
+ /* The entire 4-byte evex prefix; with R' and V' set. */
+ uint32_t p = 0x08041062;
+ int mm, pp;
+
+ tcg_debug_assert(have_avx512vl);
+
+ /* EVEX.mm */
+ if (opc & P_EXT3A) {
+ mm = 3;
+ } else if (opc & P_EXT38) {
+ mm = 2;
+ } else if (opc & P_EXT) {
+ mm = 1;
+ } else {
+ g_assert_not_reached();
+ }
+
+ /* EVEX.pp */
+ if (opc & P_DATA16) {
+ pp = 1; /* 0x66 */
+ } else if (opc & P_SIMDF3) {
+ pp = 2; /* 0xf3 */
+ } else if (opc & P_SIMDF2) {
+ pp = 3; /* 0xf2 */
+ } else {
+ pp = 0;
+ }
+
+ p = deposit32(p, 8, 2, mm);
+ p = deposit32(p, 13, 1, (rm & 8) == 0); /* EVEX.RXB.B */
+ p = deposit32(p, 14, 1, (index & 8) == 0); /* EVEX.RXB.X */
+ p = deposit32(p, 15, 1, (r & 8) == 0); /* EVEX.RXB.R */
+ p = deposit32(p, 16, 2, pp);
+ p = deposit32(p, 19, 4, ~v);
+ p = deposit32(p, 23, 1, (opc & P_VEXW) != 0);
+ p = deposit32(p, 29, 2, (opc & P_VEXL) != 0);
+
+ tcg_out32(s, p);
+ tcg_out8(s, opc);
+}
+
static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
{
- tcg_out_vex_opc(s, opc, r, v, rm, 0);
+ if (opc & P_EVEX) {
+ tcg_out_evex_opc(s, opc, r, v, rm, 0);
+ } else {
+ tcg_out_vex_opc(s, opc, r, v, rm, 0);
+ }
tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
}
@@ -2746,7 +2824,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
OPC_PSUBUB, OPC_PSUBUW, OPC_UD2, OPC_UD2
};
static int const mul_insn[4] = {
- OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_UD2
+ OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_VPMULLQ
};
static int const shift_imm_insn[4] = {
OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib
@@ -2770,28 +2848,31 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2
};
static int const smin_insn[4] = {
- OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_UD2
+ OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_VPMINSQ
};
static int const smax_insn[4] = {
- OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_UD2
+ OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_VPMAXSQ
};
static int const umin_insn[4] = {
- OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_UD2
+ OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ
};
static int const umax_insn[4] = {
- OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2
+ OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ
+ };
+ static int const rotlv_insn[4] = {
+ OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ
+ };
+ static int const rotrv_insn[4] = {
+ OPC_UD2, OPC_UD2, OPC_VPRORVD, OPC_VPRORVQ
};
static int const shlv_insn[4] = {
- /* TODO: AVX512 adds support for MO_16. */
- OPC_UD2, OPC_UD2, OPC_VPSLLVD, OPC_VPSLLVQ
+ OPC_UD2, OPC_VPSLLVW, OPC_VPSLLVD, OPC_VPSLLVQ
};
static int const shrv_insn[4] = {
- /* TODO: AVX512 adds support for MO_16. */
- OPC_UD2, OPC_UD2, OPC_VPSRLVD, OPC_VPSRLVQ
+ OPC_UD2, OPC_VPSRLVW, OPC_VPSRLVD, OPC_VPSRLVQ
};
static int const sarv_insn[4] = {
- /* TODO: AVX512 adds support for MO_16, MO_64. */
- OPC_UD2, OPC_UD2, OPC_VPSRAVD, OPC_UD2
+ OPC_UD2, OPC_VPSRAVW, OPC_VPSRAVD, OPC_VPSRAVQ
};
static int const shls_insn[4] = {
OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ
@@ -2800,16 +2881,24 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ
};
static int const sars_insn[4] = {
- OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_UD2
+ OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_VPSRAQ
+ };
+ static int const vpshldi_insn[4] = {
+ OPC_UD2, OPC_VPSHLDW, OPC_VPSHLDD, OPC_VPSHLDQ
+ };
+ static int const vpshldv_insn[4] = {
+ OPC_UD2, OPC_VPSHLDVW, OPC_VPSHLDVD, OPC_VPSHLDVQ
+ };
+ static int const vpshrdv_insn[4] = {
+ OPC_UD2, OPC_VPSHRDVW, OPC_VPSHRDVD, OPC_VPSHRDVQ
};
static int const abs_insn[4] = {
- /* TODO: AVX512 adds support for MO_64. */
- OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2
+ OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_VPABSQ
};
TCGType type = vecl + TCG_TYPE_V64;
int insn, sub;
- TCGArg a0, a1, a2;
+ TCGArg a0, a1, a2, a3;
a0 = args[0];
a1 = args[1];
@@ -2867,6 +2956,12 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_sarv_vec:
insn = sarv_insn[vece];
goto gen_simd;
+ case INDEX_op_rotlv_vec:
+ insn = rotlv_insn[vece];
+ goto gen_simd;
+ case INDEX_op_rotrv_vec:
+ insn = rotrv_insn[vece];
+ goto gen_simd;
case INDEX_op_shls_vec:
insn = shls_insn[vece];
goto gen_simd;
@@ -2888,6 +2983,16 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_x86_packus_vec:
insn = packus_insn[vece];
goto gen_simd;
+ case INDEX_op_x86_vpshldv_vec:
+ insn = vpshldv_insn[vece];
+ a1 = a2;
+ a2 = args[3];
+ goto gen_simd;
+ case INDEX_op_x86_vpshrdv_vec:
+ insn = vpshrdv_insn[vece];
+ a1 = a2;
+ a2 = args[3];
+ goto gen_simd;
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_dup2_vec:
/* First merge the two 32-bit inputs to a single 64-bit element. */
@@ -2931,17 +3036,30 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_shli_vec:
+ insn = shift_imm_insn[vece];
sub = 6;
goto gen_shift;
case INDEX_op_shri_vec:
+ insn = shift_imm_insn[vece];
sub = 2;
goto gen_shift;
case INDEX_op_sari_vec:
- tcg_debug_assert(vece != MO_64);
+ if (vece == MO_64) {
+ insn = OPC_PSHIFTD_Ib | P_VEXW | P_EVEX;
+ } else {
+ insn = shift_imm_insn[vece];
+ }
sub = 4;
+ goto gen_shift;
+ case INDEX_op_rotli_vec:
+ insn = OPC_PSHIFTD_Ib | P_EVEX; /* VPROL[DQ] */
+ if (vece == MO_64) {
+ insn |= P_VEXW;
+ }
+ sub = 1;
+ goto gen_shift;
gen_shift:
tcg_debug_assert(vece != MO_8);
- insn = shift_imm_insn[vece];
if (type == TCG_TYPE_V256) {
insn |= P_VEXL;
}
@@ -2977,7 +3095,51 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
insn = OPC_VPERM2I128;
sub = args[3];
goto gen_simd_imm8;
+ case INDEX_op_x86_vpshldi_vec:
+ insn = vpshldi_insn[vece];
+ sub = args[3];
+ goto gen_simd_imm8;
+
+ case INDEX_op_not_vec:
+ insn = OPC_VPTERNLOGQ;
+ a2 = a1;
+ sub = 0x33; /* !B */
+ goto gen_simd_imm8;
+ case INDEX_op_nor_vec:
+ insn = OPC_VPTERNLOGQ;
+ sub = 0x11; /* norCB */
+ goto gen_simd_imm8;
+ case INDEX_op_nand_vec:
+ insn = OPC_VPTERNLOGQ;
+ sub = 0x77; /* nandCB */
+ goto gen_simd_imm8;
+ case INDEX_op_eqv_vec:
+ insn = OPC_VPTERNLOGQ;
+ sub = 0x99; /* xnorCB */
+ goto gen_simd_imm8;
+ case INDEX_op_orc_vec:
+ insn = OPC_VPTERNLOGQ;
+ sub = 0xdd; /* orB!C */
+ goto gen_simd_imm8;
+
+ case INDEX_op_bitsel_vec:
+ insn = OPC_VPTERNLOGQ;
+ a3 = args[3];
+ if (a0 == a1) {
+ a1 = a2;
+ a2 = a3;
+ sub = 0xca; /* A?B:C */
+ } else if (a0 == a2) {
+ a2 = a3;
+ sub = 0xe2; /* B?A:C */
+ } else {
+ tcg_out_mov(s, type, a0, a3);
+ sub = 0xb8; /* B?C:A */
+ }
+ goto gen_simd_imm8;
+
gen_simd_imm8:
+ tcg_debug_assert(insn != OPC_UD2);
if (type == TCG_TYPE_V256) {
insn |= P_VEXL;
}
@@ -3196,6 +3358,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_or_vec:
case INDEX_op_xor_vec:
case INDEX_op_andc_vec:
+ case INDEX_op_orc_vec:
+ case INDEX_op_nand_vec:
+ case INDEX_op_nor_vec:
+ case INDEX_op_eqv_vec:
case INDEX_op_ssadd_vec:
case INDEX_op_usadd_vec:
case INDEX_op_sssub_vec:
@@ -3207,10 +3373,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
case INDEX_op_sarv_vec:
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
case INDEX_op_shls_vec:
case INDEX_op_shrs_vec:
case INDEX_op_sars_vec:
- case INDEX_op_rotls_vec:
case INDEX_op_cmp_vec:
case INDEX_op_x86_shufps_vec:
case INDEX_op_x86_blend_vec:
@@ -3219,6 +3386,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_x86_vperm2i128_vec:
case INDEX_op_x86_punpckl_vec:
case INDEX_op_x86_punpckh_vec:
+ case INDEX_op_x86_vpshldi_vec:
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_dup2_vec:
#endif
@@ -3226,12 +3394,19 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_abs_vec:
case INDEX_op_dup_vec:
+ case INDEX_op_not_vec:
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
case INDEX_op_sari_vec:
+ case INDEX_op_rotli_vec:
case INDEX_op_x86_psrldq_vec:
return C_O1_I1(x, x);
+ case INDEX_op_x86_vpshldv_vec:
+ case INDEX_op_x86_vpshrdv_vec:
+ return C_O1_I3(x, 0, x, x);
+
+ case INDEX_op_bitsel_vec:
case INDEX_op_x86_vpblendvb_vec:
return C_O1_I3(x, x, x, x);
@@ -3249,53 +3424,96 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_or_vec:
case INDEX_op_xor_vec:
case INDEX_op_andc_vec:
+ case INDEX_op_orc_vec:
+ case INDEX_op_nand_vec:
+ case INDEX_op_nor_vec:
+ case INDEX_op_eqv_vec:
+ case INDEX_op_not_vec:
+ case INDEX_op_bitsel_vec:
return 1;
- case INDEX_op_rotli_vec:
case INDEX_op_cmp_vec:
case INDEX_op_cmpsel_vec:
return -1;
+ case INDEX_op_rotli_vec:
+ return have_avx512vl && vece >= MO_32 ? 1 : -1;
+
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
/* We must expand the operation for MO_8. */
return vece == MO_8 ? -1 : 1;
case INDEX_op_sari_vec:
- /* We must expand the operation for MO_8. */
- if (vece == MO_8) {
+ switch (vece) {
+ case MO_8:
return -1;
- }
- /* We can emulate this for MO_64, but it does not pay off
- unless we're producing at least 4 values. */
- if (vece == MO_64) {
+ case MO_16:
+ case MO_32:
+ return 1;
+ case MO_64:
+ if (have_avx512vl) {
+ return 1;
+ }
+ /*
+ * We can emulate this for MO_64, but it does not pay off
+ * unless we're producing at least 4 values.
+ */
return type >= TCG_TYPE_V256 ? -1 : 0;
}
- return 1;
+ return 0;
case INDEX_op_shls_vec:
case INDEX_op_shrs_vec:
return vece >= MO_16;
case INDEX_op_sars_vec:
- return vece >= MO_16 && vece <= MO_32;
+ switch (vece) {
+ case MO_16:
+ case MO_32:
+ return 1;
+ case MO_64:
+ return have_avx512vl;
+ }
+ return 0;
case INDEX_op_rotls_vec:
return vece >= MO_16 ? -1 : 0;
case INDEX_op_shlv_vec:
case INDEX_op_shrv_vec:
- return have_avx2 && vece >= MO_32;
+ switch (vece) {
+ case MO_16:
+ return have_avx512bw;
+ case MO_32:
+ case MO_64:
+ return have_avx2;
+ }
+ return 0;
case INDEX_op_sarv_vec:
- return have_avx2 && vece == MO_32;
+ switch (vece) {
+ case MO_16:
+ return have_avx512bw;
+ case MO_32:
+ return have_avx2;
+ case MO_64:
+ return have_avx512vl;
+ }
+ return 0;
case INDEX_op_rotlv_vec:
case INDEX_op_rotrv_vec:
- return have_avx2 && vece >= MO_32 ? -1 : 0;
+ switch (vece) {
+ case MO_16:
+ return have_avx512vbmi2 ? -1 : 0;
+ case MO_32:
+ case MO_64:
+ return have_avx512vl ? 1 : have_avx2 ? -1 : 0;
+ }
+ return 0;
case INDEX_op_mul_vec:
- if (vece == MO_8) {
- /* We can expand the operation for MO_8. */
+ switch (vece) {
+ case MO_8:
return -1;
- }
- if (vece == MO_64) {
- return 0;
+ case MO_64:
+ return have_avx512dq;
}
return 1;
@@ -3309,7 +3527,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_umin_vec:
case INDEX_op_umax_vec:
case INDEX_op_abs_vec:
- return vece <= MO_32;
+ return vece <= MO_32 || have_avx512vl;
default:
return 0;
@@ -3427,6 +3645,12 @@ static void expand_vec_rotli(TCGType type, unsigned vece,
return;
}
+ if (have_avx512vbmi2) {
+ vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece,
+ tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm);
+ return;
+ }
+
t = tcg_temp_new_vec(type);
tcg_gen_shli_vec(vece, t, v1, imm);
tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
@@ -3434,31 +3658,19 @@ static void expand_vec_rotli(TCGType type, unsigned vece,
tcg_temp_free_vec(t);
}
-static void expand_vec_rotls(TCGType type, unsigned vece,
- TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
+static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGv_vec sh, bool right)
{
- TCGv_i32 rsh;
TCGv_vec t;
- tcg_debug_assert(vece != MO_8);
+ if (have_avx512vbmi2) {
+ vec_gen_4(right ? INDEX_op_x86_vpshrdv_vec : INDEX_op_x86_vpshldv_vec,
+ type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v1),
+ tcgv_vec_arg(v1), tcgv_vec_arg(sh));
+ return;
+ }
t = tcg_temp_new_vec(type);
- rsh = tcg_temp_new_i32();
-
- tcg_gen_neg_i32(rsh, lsh);
- tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
- tcg_gen_shls_vec(vece, t, v1, lsh);
- tcg_gen_shrs_vec(vece, v0, v1, rsh);
- tcg_gen_or_vec(vece, v0, v0, t);
- tcg_temp_free_vec(t);
- tcg_temp_free_i32(rsh);
-}
-
-static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
- TCGv_vec v1, TCGv_vec sh, bool right)
-{
- TCGv_vec t = tcg_temp_new_vec(type);
-
tcg_gen_dupi_vec(vece, t, 8 << vece);
tcg_gen_sub_vec(vece, t, t, sh);
if (right) {
@@ -3472,6 +3684,35 @@ static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
tcg_temp_free_vec(t);
}
+static void expand_vec_rotls(TCGType type, unsigned vece,
+ TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
+{
+ TCGv_vec t = tcg_temp_new_vec(type);
+
+ tcg_debug_assert(vece != MO_8);
+
+ if (vece >= MO_32 ? have_avx512vl : have_avx512vbmi2) {
+ tcg_gen_dup_i32_vec(vece, t, lsh);
+ if (vece >= MO_32) {
+ tcg_gen_rotlv_vec(vece, v0, v1, t);
+ } else {
+ expand_vec_rotv(type, vece, v0, v1, t, false);
+ }
+ } else {
+ TCGv_i32 rsh = tcg_temp_new_i32();
+
+ tcg_gen_neg_i32(rsh, lsh);
+ tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
+ tcg_gen_shls_vec(vece, t, v1, lsh);
+ tcg_gen_shrs_vec(vece, v0, v1, rsh);
+ tcg_gen_or_vec(vece, v0, v0, t);
+
+ tcg_temp_free_i32(rsh);
+ }
+
+ tcg_temp_free_vec(t);
+}
+
static void expand_vec_mul(TCGType type, unsigned vece,
TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
{
@@ -3567,28 +3808,28 @@ static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
fixup = NEED_SWAP | NEED_INV;
break;
case TCG_COND_LEU:
- if (vece <= MO_32) {
+ if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) {
fixup = NEED_UMIN;
} else {
fixup = NEED_BIAS | NEED_INV;
}
break;
case TCG_COND_GTU:
- if (vece <= MO_32) {
+ if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) {
fixup = NEED_UMIN | NEED_INV;
} else {
fixup = NEED_BIAS;
}
break;
case TCG_COND_GEU:
- if (vece <= MO_32) {
+ if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) {
fixup = NEED_UMAX;
} else {
fixup = NEED_BIAS | NEED_SWAP | NEED_INV;
}
break;
case TCG_COND_LTU:
- if (vece <= MO_32) {
+ if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) {
fixup = NEED_UMAX | NEED_INV;
} else {
fixup = NEED_BIAS | NEED_SWAP;
@@ -3839,12 +4080,12 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
static void tcg_target_init(TCGContext *s)
{
#ifdef CONFIG_CPUID_H
- unsigned a, b, c, d, b7 = 0;
+ unsigned a, b, c, d, b7 = 0, c7 = 0;
unsigned max = __get_cpuid_max(0, 0);
if (max >= 7) {
/* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
- __cpuid_count(7, 0, a, b7, c, d);
+ __cpuid_count(7, 0, a, b7, c7, d);
have_bmi1 = (b7 & bit_BMI) != 0;
have_bmi2 = (b7 & bit_BMI2) != 0;
}
@@ -3874,6 +4115,22 @@ static void tcg_target_init(TCGContext *s)
if ((xcrl & 6) == 6) {
have_avx1 = (c & bit_AVX) != 0;
have_avx2 = (b7 & bit_AVX2) != 0;
+
+ /*
+ * There are interesting instructions in AVX512, so long
+ * as we have AVX512VL, which indicates support for EVEX
+ * on sizes smaller than 512 bits. We are required to
+ * check that OPMASK and all extended ZMM state are enabled
+ * even if we're not using them -- the insns will fault.
+ */
+ if ((xcrl & 0xe0) == 0xe0
+ && (b7 & bit_AVX512F)
+ && (b7 & bit_AVX512VL)) {
+ have_avx512vl = true;
+ have_avx512bw = (b7 & bit_AVX512BW) != 0;
+ have_avx512dq = (b7 & bit_AVX512DQ) != 0;
+ have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
+ }
}
}
}
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 3b2c9437a0..00fcbe297d 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -103,6 +103,10 @@ extern bool have_bmi1;
extern bool have_popcnt;
extern bool have_avx1;
extern bool have_avx2;
+extern bool have_avx512bw;
+extern bool have_avx512dq;
+extern bool have_avx512vbmi2;
+extern bool have_avx512vl;
extern bool have_movbe;
/* optional instructions */
@@ -184,20 +188,23 @@ extern bool have_movbe;
#define TCG_TARGET_HAS_v256 have_avx2
#define TCG_TARGET_HAS_andc_vec 1
-#define TCG_TARGET_HAS_orc_vec 0
-#define TCG_TARGET_HAS_not_vec 0
+#define TCG_TARGET_HAS_orc_vec have_avx512vl
+#define TCG_TARGET_HAS_nand_vec have_avx512vl
+#define TCG_TARGET_HAS_nor_vec have_avx512vl
+#define TCG_TARGET_HAS_eqv_vec have_avx512vl
+#define TCG_TARGET_HAS_not_vec have_avx512vl
#define TCG_TARGET_HAS_neg_vec 0
#define TCG_TARGET_HAS_abs_vec 1
-#define TCG_TARGET_HAS_roti_vec 0
+#define TCG_TARGET_HAS_roti_vec have_avx512vl
#define TCG_TARGET_HAS_rots_vec 0
-#define TCG_TARGET_HAS_rotv_vec 0
+#define TCG_TARGET_HAS_rotv_vec have_avx512vl
#define TCG_TARGET_HAS_shi_vec 1
#define TCG_TARGET_HAS_shs_vec 1
#define TCG_TARGET_HAS_shv_vec have_avx2
#define TCG_TARGET_HAS_mul_vec 1
#define TCG_TARGET_HAS_sat_vec 1
#define TCG_TARGET_HAS_minmax_vec 1
-#define TCG_TARGET_HAS_bitsel_vec 0
+#define TCG_TARGET_HAS_bitsel_vec have_avx512vl
#define TCG_TARGET_HAS_cmpsel_vec -1
#define TCG_TARGET_deposit_i32_valid(ofs, len) \
diff --git a/tcg/i386/tcg-target.opc.h b/tcg/i386/tcg-target.opc.h
index 1312941800..b5f403e35e 100644
--- a/tcg/i386/tcg-target.opc.h
+++ b/tcg/i386/tcg-target.opc.h
@@ -33,3 +33,6 @@ DEF(x86_psrldq_vec, 1, 1, 1, IMPLVEC)
DEF(x86_vperm2i128_vec, 1, 2, 1, IMPLVEC)
DEF(x86_punpckl_vec, 1, 2, 0, IMPLVEC)
DEF(x86_punpckh_vec, 1, 2, 0, IMPLVEC)
+DEF(x86_vpshldi_vec, 1, 2, 1, IMPLVEC)
+DEF(x86_vpshldv_vec, 1, 3, 0, IMPLVEC)
+DEF(x86_vpshrdv_vec, 1, 3, 0, IMPLVEC)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index e573000951..ae081ab29c 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -359,13 +359,13 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
CASE_OP_32_64_VEC(orc):
return x | ~y;
- CASE_OP_32_64(eqv):
+ CASE_OP_32_64_VEC(eqv):
return ~(x ^ y);
- CASE_OP_32_64(nand):
+ CASE_OP_32_64_VEC(nand):
return ~(x & y);
- CASE_OP_32_64(nor):
+ CASE_OP_32_64_VEC(nor):
return ~(x | y);
case INDEX_op_clz_i32:
@@ -552,10 +552,10 @@ static bool do_constant_folding_cond_eq(TCGCond c)
static int do_constant_folding_cond(TCGType type, TCGArg x,
TCGArg y, TCGCond c)
{
- uint64_t xv = arg_info(x)->val;
- uint64_t yv = arg_info(y)->val;
-
if (arg_is_const(x) && arg_is_const(y)) {
+ uint64_t xv = arg_info(x)->val;
+ uint64_t yv = arg_info(y)->val;
+
switch (type) {
case TCG_TYPE_I32:
return do_constant_folding_cond_32(xv, yv, c);
@@ -567,7 +567,7 @@ static int do_constant_folding_cond(TCGType type, TCGArg x,
}
} else if (args_are_copies(x, y)) {
return do_constant_folding_cond_eq(c);
- } else if (arg_is_const(y) && yv == 0) {
+ } else if (arg_is_const(y) && arg_info(y)->val == 0) {
switch (c) {
case TCG_COND_LTU:
return 0;
@@ -2119,7 +2119,7 @@ void tcg_optimize(TCGContext *s)
case INDEX_op_dup2_vec:
done = fold_dup2(&ctx, op);
break;
- CASE_OP_32_64(eqv):
+ CASE_OP_32_64_VEC(eqv):
done = fold_eqv(&ctx, op);
break;
CASE_OP_32_64(extract):
@@ -2170,13 +2170,13 @@ void tcg_optimize(TCGContext *s)
CASE_OP_32_64(mulu2):
done = fold_multiply2(&ctx, op);
break;
- CASE_OP_32_64(nand):
+ CASE_OP_32_64_VEC(nand):
done = fold_nand(&ctx, op);
break;
CASE_OP_32_64(neg):
done = fold_neg(&ctx, op);
break;
- CASE_OP_32_64(nor):
+ CASE_OP_32_64_VEC(nor):
done = fold_nor(&ctx, op);
break;
CASE_OP_32_64_VEC(not):
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 69d22e08cb..1f3c5c171c 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -3122,6 +3122,9 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_xor_vec:
case INDEX_op_andc_vec:
case INDEX_op_not_vec:
+ case INDEX_op_nor_vec:
+ case INDEX_op_eqv_vec:
+ case INDEX_op_nand_vec:
return 1;
case INDEX_op_orc_vec:
return have_isa_2_07;
@@ -3400,6 +3403,15 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_orc_vec:
insn = VORC;
break;
+ case INDEX_op_nand_vec:
+ insn = VNAND;
+ break;
+ case INDEX_op_nor_vec:
+ insn = VNOR;
+ break;
+ case INDEX_op_eqv_vec:
+ insn = VEQV;
+ break;
case INDEX_op_cmp_vec:
switch (args[3]) {
@@ -3787,6 +3799,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_xor_vec:
case INDEX_op_andc_vec:
case INDEX_op_orc_vec:
+ case INDEX_op_nor_vec:
+ case INDEX_op_eqv_vec:
+ case INDEX_op_nand_vec:
case INDEX_op_cmp_vec:
case INDEX_op_ssadd_vec:
case INDEX_op_sssub_vec:
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index c775c97b61..e6cf72503f 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -162,6 +162,9 @@ extern bool have_vsx;
#define TCG_TARGET_HAS_andc_vec 1
#define TCG_TARGET_HAS_orc_vec have_isa_2_07
+#define TCG_TARGET_HAS_nand_vec have_isa_2_07
+#define TCG_TARGET_HAS_nor_vec 1
+#define TCG_TARGET_HAS_eqv_vec have_isa_2_07
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
#define TCG_TARGET_HAS_abs_vec 0
diff --git a/tcg/region.c b/tcg/region.c
index 72afb35738..97ca5291d5 100644
--- a/tcg/region.c
+++ b/tcg/region.c
@@ -26,6 +26,7 @@
#include "qemu/units.h"
#include "qemu/madvise.h"
#include "qemu/mprotect.h"
+#include "qemu/memalign.h"
#include "qemu/cacheinfo.h"
#include "qapi/error.h"
#include "exec/exec-all.h"
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index d56c1e51e4..6e65828c09 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -290,7 +290,9 @@ typedef enum S390Opcode {
VRRc_VMXL = 0xe7fd,
VRRc_VN = 0xe768,
VRRc_VNC = 0xe769,
+ VRRc_VNN = 0xe76e,
VRRc_VNO = 0xe76b,
+ VRRc_VNX = 0xe76c,
VRRc_VO = 0xe76a,
VRRc_VOC = 0xe76f,
VRRc_VPKS = 0xe797, /* we leave the m5 cs field 0 */
@@ -2805,6 +2807,15 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_xor_vec:
tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
break;
+ case INDEX_op_nand_vec:
+ tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
+ break;
+ case INDEX_op_nor_vec:
+ tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
+ break;
+ case INDEX_op_eqv_vec:
+ tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
+ break;
case INDEX_op_shli_vec:
tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
@@ -2901,7 +2912,10 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_and_vec:
case INDEX_op_andc_vec:
case INDEX_op_bitsel_vec:
+ case INDEX_op_eqv_vec:
+ case INDEX_op_nand_vec:
case INDEX_op_neg_vec:
+ case INDEX_op_nor_vec:
case INDEX_op_not_vec:
case INDEX_op_or_vec:
case INDEX_op_orc_vec:
@@ -3246,6 +3260,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_or_vec:
case INDEX_op_orc_vec:
case INDEX_op_xor_vec:
+ case INDEX_op_nand_vec:
+ case INDEX_op_nor_vec:
+ case INDEX_op_eqv_vec:
case INDEX_op_cmp_vec:
case INDEX_op_mul_vec:
case INDEX_op_rotlv_vec:
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 69217d995b..23e2063667 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -145,6 +145,9 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_andc_vec 1
#define TCG_TARGET_HAS_orc_vec HAVE_FACILITY(VECTOR_ENH1)
+#define TCG_TARGET_HAS_nand_vec HAVE_FACILITY(VECTOR_ENH1)
+#define TCG_TARGET_HAS_nor_vec 1
+#define TCG_TARGET_HAS_eqv_vec HAVE_FACILITY(VECTOR_ENH1)
#define TCG_TARGET_HAS_not_vec 1
#define TCG_TARGET_HAS_neg_vec 1
#define TCG_TARGET_HAS_abs_vec 1
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
index faf30f9cdd..463dabf515 100644
--- a/tcg/tcg-op-vec.c
+++ b/tcg/tcg-op-vec.c
@@ -371,23 +371,32 @@ void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
- /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */
- tcg_gen_and_vec(0, r, a, b);
- tcg_gen_not_vec(0, r, r);
+ if (TCG_TARGET_HAS_nand_vec) {
+ vec_gen_op3(INDEX_op_nand_vec, 0, r, a, b);
+ } else {
+ tcg_gen_and_vec(0, r, a, b);
+ tcg_gen_not_vec(0, r, r);
+ }
}
void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
- /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */
- tcg_gen_or_vec(0, r, a, b);
- tcg_gen_not_vec(0, r, r);
+ if (TCG_TARGET_HAS_nor_vec) {
+ vec_gen_op3(INDEX_op_nor_vec, 0, r, a, b);
+ } else {
+ tcg_gen_or_vec(0, r, a, b);
+ tcg_gen_not_vec(0, r, r);
+ }
}
void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
{
- /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */
- tcg_gen_xor_vec(0, r, a, b);
- tcg_gen_not_vec(0, r, r);
+ if (TCG_TARGET_HAS_eqv_vec) {
+ vec_gen_op3(INDEX_op_eqv_vec, 0, r, a, b);
+ } else {
+ tcg_gen_xor_vec(0, r, a, b);
+ tcg_gen_not_vec(0, r, r);
+ }
}
static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 528277d1d3..33a97eabdb 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1407,6 +1407,12 @@ bool tcg_op_supported(TCGOpcode op)
return have_vec && TCG_TARGET_HAS_andc_vec;
case INDEX_op_orc_vec:
return have_vec && TCG_TARGET_HAS_orc_vec;
+ case INDEX_op_nand_vec:
+ return have_vec && TCG_TARGET_HAS_nand_vec;
+ case INDEX_op_nor_vec:
+ return have_vec && TCG_TARGET_HAS_nor_vec;
+ case INDEX_op_eqv_vec:
+ return have_vec && TCG_TARGET_HAS_eqv_vec;
case INDEX_op_mul_vec:
return have_vec && TCG_TARGET_HAS_mul_vec;
case INDEX_op_shli_vec:
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index 9ff1fa0832..98337c567a 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -197,7 +197,7 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_R0,
};
-#if MAX_OPC_PARAM_IARGS != 6
+#if MAX_OPC_PARAM_IARGS != 7
# error Fix needed, number of supported input arguments changed!
#endif
diff --git a/tests/avocado/avocado_qemu/__init__.py b/tests/avocado/avocado_qemu/__init__.py
index 75063c0c30..9b056b5ce5 100644
--- a/tests/avocado/avocado_qemu/__init__.py
+++ b/tests/avocado/avocado_qemu/__init__.py
@@ -603,6 +603,8 @@ class LinuxTest(LinuxSSHMixIn, QemuSystemTest):
try:
cloudinit_iso = os.path.join(self.workdir, 'cloudinit.iso')
self.phone_home_port = network.find_free_port()
+ if not self.phone_home_port:
+ self.cancel('Failed to get a free port')
pubkey_content = None
if ssh_pubkey:
with open(ssh_pubkey) as pubkey:
diff --git a/tests/avocado/boot_linux.py b/tests/avocado/boot_linux.py
index ab19146d1e..ee584d2fdf 100644
--- a/tests/avocado/boot_linux.py
+++ b/tests/avocado/boot_linux.py
@@ -79,6 +79,7 @@ class BootLinuxAarch64(LinuxTest):
"""
self.require_accelerator("tcg")
self.vm.add_args("-accel", "tcg")
+ self.vm.add_args("-cpu", "max,lpa2=off")
self.vm.add_args("-machine", "virt,gic-version=2")
self.add_common_args()
self.launch_and_wait(set_up_ssh_connection=False)
@@ -91,6 +92,7 @@ class BootLinuxAarch64(LinuxTest):
"""
self.require_accelerator("tcg")
self.vm.add_args("-accel", "tcg")
+ self.vm.add_args("-cpu", "max,lpa2=off")
self.vm.add_args("-machine", "virt,gic-version=3")
self.add_common_args()
self.launch_and_wait(set_up_ssh_connection=False)
diff --git a/tests/bench/atomic_add-bench.c b/tests/bench/atomic_add-bench.c
index f05471ab45..8a6faad6ec 100644
--- a/tests/bench/atomic_add-bench.c
+++ b/tests/bench/atomic_add-bench.c
@@ -2,6 +2,7 @@
#include "qemu/thread.h"
#include "qemu/host-utils.h"
#include "qemu/processor.h"
+#include "qemu/memalign.h"
struct thread_info {
uint64_t r;
diff --git a/tests/bench/qht-bench.c b/tests/bench/qht-bench.c
index 2e5b70ccd0..8afe161d10 100644
--- a/tests/bench/qht-bench.c
+++ b/tests/bench/qht-bench.c
@@ -10,6 +10,7 @@
#include "qemu/qht.h"
#include "qemu/rcu.h"
#include "qemu/xxhash.h"
+#include "qemu/memalign.h"
struct thread_stats {
size_t rd;
diff --git a/tests/check-block.sh b/tests/check-block.sh
index 18f7433901..f59496396c 100755
--- a/tests/check-block.sh
+++ b/tests/check-block.sh
@@ -48,18 +48,6 @@ if LANG=C bash --version | grep -q 'GNU bash, version [123]' ; then
skip "bash version too old ==> Not running the qemu-iotests."
fi
-if ! (sed --version | grep 'GNU sed') > /dev/null 2>&1 ; then
- if ! command -v gsed >/dev/null 2>&1; then
- skip "GNU sed not available ==> Not running the qemu-iotests."
- fi
-else
- # Double-check that we're not using BusyBox' sed which says
- # that "This is not GNU sed version 4.0" ...
- if sed --version | grep -q 'not GNU sed' ; then
- skip "BusyBox sed not supported ==> Not running the qemu-iotests."
- fi
-fi
-
cd tests/qemu-iotests
# QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index 6af5ab9e76..0e1cfd7e49 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -744,6 +744,7 @@ class TestCommitWithFilters(iotests.QMPTestCase):
pattern_file)
self.assertFalse('Pattern verification failed' in result)
+ @iotests.skip_if_unsupported(['throttle'])
def setUp(self):
qemu_img('create', '-f', iotests.imgfmt, self.img0, '64M')
qemu_img('create', '-f', iotests.imgfmt, self.img1, '64M')
diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185
index f2ec5c5ceb..8b1143dc16 100755
--- a/tests/qemu-iotests/185
+++ b/tests/qemu-iotests/185
@@ -33,6 +33,12 @@ _cleanup()
_rm_test_img "${TEST_IMG}.copy"
_cleanup_test_img
_cleanup_qemu
+
+ if [ -f "$TEST_DIR/qsd.pid" ]; then
+ kill -SIGKILL "$(cat "$TEST_DIR/qsd.pid")"
+ rm -f "$TEST_DIR/qsd.pid"
+ fi
+ rm -f "$SOCK_DIR/qsd.sock"
}
trap "_cleanup; exit \$status" 0 1 2 3 15
@@ -45,7 +51,7 @@ _supported_fmt qcow2
_supported_proto file
_supported_os Linux
-size=64M
+size=$((64 * 1048576))
TEST_IMG="${TEST_IMG}.base" _make_test_img $size
echo
@@ -216,6 +222,188 @@ wait=1 _cleanup_qemu | grep -v 'JOB_STATUS_CHANGE'
_check_test_img
+echo
+echo === Start mirror to throttled QSD and exit qemu ===
+echo
+
+# Mirror to a throttled QSD instance (so that qemu cannot drain the
+# throttling), wait for READY, then write some data to the device,
+# and then quit qemu.
+# (qemu should force-cancel the job and not wait for the data to be
+# written to the target.)
+
+_make_test_img $size
+
+# Will be used by this and the next case
+set_up_throttled_qsd() {
+ $QSD \
+ --object throttle-group,id=thrgr,limits.bps-total=1048576 \
+ --blockdev null-co,node-name=null,size=$size \
+ --blockdev throttle,node-name=throttled,throttle-group=thrgr,file=null \
+ --nbd-server addr.type=unix,addr.path="$SOCK_DIR/qsd.sock" \
+ --export nbd,id=exp,node-name=throttled,name=target,writable=true \
+ --pidfile "$TEST_DIR/qsd.pid" \
+ --daemonize
+}
+
+set_up_throttled_qsd
+
+# Need a virtio-blk device so that qemu-io writes will not block the monitor
+_launch_qemu \
+ --blockdev file,node-name=source-proto,filename="$TEST_IMG" \
+ --blockdev qcow2,node-name=source-fmt,file=source-proto \
+ --device virtio-blk,id=vblk,drive=source-fmt \
+ --blockdev "{\"driver\": \"nbd\",
+ \"node-name\": \"target\",
+ \"server\": {
+ \"type\": \"unix\",
+ \"path\": \"$SOCK_DIR/qsd.sock\"
+ },
+ \"export\": \"target\"}"
+
+h=$QEMU_HANDLE
+_send_qemu_cmd $h '{"execute": "qmp_capabilities"}' 'return'
+
+# Use sync=top, so the first pass will not copy the whole image
+_send_qemu_cmd $h \
+ '{"execute": "blockdev-mirror",
+ "arguments": {
+ "job-id": "mirror",
+ "device": "source-fmt",
+ "target": "target",
+ "sync": "top"
+ }}' \
+ 'return' \
+ | grep -v JOB_STATUS_CHANGE # Ignore these events during creation
+
+# This too will be used by this and the next case
+# $1: QEMU handle
+# $2: Image size
+wait_for_job_and_quit() {
+ h=$1
+ size=$2
+
+ # List of expected events
+ capture_events='BLOCK_JOB_READY JOB_STATUS_CHANGE'
+ _wait_event $h 'BLOCK_JOB_READY'
+ QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before READY
+
+ # Write something to the device for post-READY mirroring. Write it in
+ # blocks matching the cluster size, each spaced one block apart, so
+ # that the mirror job will have to spawn one request per cluster.
+ # Because the number of concurrent requests is limited (to 16), this
+ # limits the number of bytes concurrently in flight, which speeds up
+ # cancelling the job (in-flight requests still are waited for).
+ # To limit the number of bytes in flight, we could alternatively pass
+ # something for blockdev-mirror's @buf-size parameter, but
+ # block-commit does not have such a parameter, so we need to figure
+ # something out that works for both.
+
+ cluster_size=65536
+ step=$((cluster_size * 2))
+
+ echo '--- Writing data to the virtio-blk device ---'
+
+ for ofs in $(seq 0 $step $((size - step))); do
+ qemu_io_cmd="qemu-io -d vblk/virtio-backend "
+ qemu_io_cmd+="\\\"aio_write $ofs $cluster_size\\\""
+
+ # Do not include these requests in the reference output
+ # (it's just too much)
+ silent=yes _send_qemu_cmd $h \
+ "{\"execute\": \"human-monitor-command\",
+ \"arguments\": {
+ \"command-line\": \"$qemu_io_cmd\"
+ }}" \
+ 'return'
+ done
+
+ # Wait until the job's length is updated to reflect the write requests
+
+ # We have written to half of the device, so this is the expected job length
+ final_len=$((size / 2))
+ timeout=100 # unit: 0.1 seconds
+ while true; do
+ len=$(
+ _send_qemu_cmd $h \
+ '{"execute": "query-block-jobs"}' \
+ 'return.*"len": [0-9]\+' \
+ | grep 'return.*"len": [0-9]\+' \
+ | sed -e 's/.*"len": \([0-9]\+\).*/\1/'
+ )
+ if [ "$len" -eq "$final_len" ]; then
+ break
+ fi
+ timeout=$((timeout - 1))
+ if [ "$timeout" -eq 0 ]; then
+ echo "ERROR: Timeout waiting for job to reach len=$final_len"
+ break
+ fi
+ sleep 0.1
+ done
+
+ sleep 1
+
+ _send_qemu_cmd $h \
+ '{"execute": "quit"}' \
+ 'return'
+
+ # List of expected events
+ capture_events='BLOCK_JOB_CANCELLED JOB_STATUS_CHANGE SHUTDOWN'
+ _wait_event $h 'SHUTDOWN'
+ QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before SHUTDOWN
+ _wait_event $h 'JOB_STATUS_CHANGE' # standby
+ _wait_event $h 'JOB_STATUS_CHANGE' # ready
+ _wait_event $h 'JOB_STATUS_CHANGE' # aborting
+ # Filter the offset (depends on when exactly `quit` was issued)
+ _wait_event $h 'BLOCK_JOB_CANCELLED' \
+ | sed -e 's/"offset": [0-9]\+/"offset": (filtered)/'
+ _wait_event $h 'JOB_STATUS_CHANGE' # concluded
+ _wait_event $h 'JOB_STATUS_CHANGE' # null
+
+ wait=yes _cleanup_qemu
+
+ kill -SIGTERM "$(cat "$TEST_DIR/qsd.pid")"
+}
+
+wait_for_job_and_quit $h $size
+
+echo
+echo === Start active commit to throttled QSD and exit qemu ===
+echo
+
+# Same as the above, but instead of mirroring, do an active commit
+
+_make_test_img $size
+
+set_up_throttled_qsd
+
+_launch_qemu \
+ --blockdev "{\"driver\": \"nbd\",
+ \"node-name\": \"target\",
+ \"server\": {
+ \"type\": \"unix\",
+ \"path\": \"$SOCK_DIR/qsd.sock\"
+ },
+ \"export\": \"target\"}" \
+ --blockdev file,node-name=source-proto,filename="$TEST_IMG" \
+ --blockdev qcow2,node-name=source-fmt,file=source-proto,backing=target \
+ --device virtio-blk,id=vblk,drive=source-fmt
+
+h=$QEMU_HANDLE
+_send_qemu_cmd $h '{"execute": "qmp_capabilities"}' 'return'
+
+_send_qemu_cmd $h \
+ '{"execute": "block-commit",
+ "arguments": {
+ "job-id": "commit",
+ "device": "source-fmt"
+ }}' \
+ 'return' \
+ | grep -v JOB_STATUS_CHANGE # Ignore these events during creation
+
+wait_for_job_and_quit $h $size
+
# success, all done
echo "*** done"
rm -f $seq.full
diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out
index 754a641258..70e8dd6c87 100644
--- a/tests/qemu-iotests/185.out
+++ b/tests/qemu-iotests/185.out
@@ -116,4 +116,52 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "stream"}}
No errors were found on the image.
+
+=== Start mirror to throttled QSD and exit qemu ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+{"execute": "qmp_capabilities"}
+{"return": {}}
+{"execute": "blockdev-mirror",
+ "arguments": {
+ "job-id": "mirror",
+ "device": "source-fmt",
+ "target": "target",
+ "sync": "top"
+ }}
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "mirror", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
+--- Writing data to the virtio-blk device ---
+{"execute": "quit"}
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "mirror"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "mirror"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "mirror"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "mirror", "len": 33554432, "offset": (filtered), "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "mirror"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "mirror"}}
+
+=== Start active commit to throttled QSD and exit qemu ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+{"execute": "qmp_capabilities"}
+{"return": {}}
+{"execute": "block-commit",
+ "arguments": {
+ "job-id": "commit",
+ "device": "source-fmt"
+ }}
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "commit", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
+--- Writing data to the virtio-blk device ---
+{"execute": "quit"}
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "commit"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "commit"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "commit"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "commit", "len": 33554432, "offset": (filtered), "speed": 0, "type": "commit"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "commit"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "commit"}}
*** done
diff --git a/tests/qemu-iotests/257.out b/tests/qemu-iotests/257.out
index 50cbd8e882..aa76131ca9 100644
--- a/tests/qemu-iotests/257.out
+++ b/tests/qemu-iotests/257.out
@@ -106,6 +106,22 @@ write -P0x67 0x3fe0000 0x20000
{"return": ""}
{
"bitmaps": {
+ "backup-top": [
+ {
+ "busy": false,
+ "count": 67108864,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ },
+ {
+ "busy": false,
+ "count": 458752,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ }
+ ],
"drive0": [
{
"busy": false,
@@ -566,6 +582,22 @@ write -P0x67 0x3fe0000 0x20000
{"return": ""}
{
"bitmaps": {
+ "backup-top": [
+ {
+ "busy": false,
+ "count": 67108864,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ },
+ {
+ "busy": false,
+ "count": 458752,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ }
+ ],
"drive0": [
{
"busy": false,
@@ -819,6 +851,22 @@ write -P0x67 0x3fe0000 0x20000
{"return": ""}
{
"bitmaps": {
+ "backup-top": [
+ {
+ "busy": false,
+ "count": 67108864,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ },
+ {
+ "busy": false,
+ "count": 458752,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ }
+ ],
"drive0": [
{
"busy": false,
@@ -1279,6 +1327,22 @@ write -P0x67 0x3fe0000 0x20000
{"return": ""}
{
"bitmaps": {
+ "backup-top": [
+ {
+ "busy": false,
+ "count": 67108864,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ },
+ {
+ "busy": false,
+ "count": 458752,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ }
+ ],
"drive0": [
{
"busy": false,
@@ -1532,6 +1596,22 @@ write -P0x67 0x3fe0000 0x20000
{"return": ""}
{
"bitmaps": {
+ "backup-top": [
+ {
+ "busy": false,
+ "count": 67108864,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ },
+ {
+ "busy": false,
+ "count": 458752,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ }
+ ],
"drive0": [
{
"busy": false,
@@ -1992,6 +2072,22 @@ write -P0x67 0x3fe0000 0x20000
{"return": ""}
{
"bitmaps": {
+ "backup-top": [
+ {
+ "busy": false,
+ "count": 67108864,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ },
+ {
+ "busy": false,
+ "count": 458752,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ }
+ ],
"drive0": [
{
"busy": false,
@@ -2245,6 +2341,22 @@ write -P0x67 0x3fe0000 0x20000
{"return": ""}
{
"bitmaps": {
+ "backup-top": [
+ {
+ "busy": false,
+ "count": 67108864,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ },
+ {
+ "busy": false,
+ "count": 458752,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ }
+ ],
"drive0": [
{
"busy": false,
@@ -2705,6 +2817,22 @@ write -P0x67 0x3fe0000 0x20000
{"return": ""}
{
"bitmaps": {
+ "backup-top": [
+ {
+ "busy": false,
+ "count": 67108864,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ },
+ {
+ "busy": false,
+ "count": 458752,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ }
+ ],
"drive0": [
{
"busy": false,
@@ -2958,6 +3086,22 @@ write -P0x67 0x3fe0000 0x20000
{"return": ""}
{
"bitmaps": {
+ "backup-top": [
+ {
+ "busy": false,
+ "count": 67108864,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ },
+ {
+ "busy": false,
+ "count": 458752,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ }
+ ],
"drive0": [
{
"busy": false,
@@ -3418,6 +3562,22 @@ write -P0x67 0x3fe0000 0x20000
{"return": ""}
{
"bitmaps": {
+ "backup-top": [
+ {
+ "busy": false,
+ "count": 67108864,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ },
+ {
+ "busy": false,
+ "count": 458752,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ }
+ ],
"drive0": [
{
"busy": false,
@@ -3671,6 +3831,22 @@ write -P0x67 0x3fe0000 0x20000
{"return": ""}
{
"bitmaps": {
+ "backup-top": [
+ {
+ "busy": false,
+ "count": 67108864,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ },
+ {
+ "busy": false,
+ "count": 458752,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ }
+ ],
"drive0": [
{
"busy": false,
@@ -4131,6 +4307,22 @@ write -P0x67 0x3fe0000 0x20000
{"return": ""}
{
"bitmaps": {
+ "backup-top": [
+ {
+ "busy": false,
+ "count": 67108864,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ },
+ {
+ "busy": false,
+ "count": 458752,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ }
+ ],
"drive0": [
{
"busy": false,
@@ -4384,6 +4576,22 @@ write -P0x67 0x3fe0000 0x20000
{"return": ""}
{
"bitmaps": {
+ "backup-top": [
+ {
+ "busy": false,
+ "count": 67108864,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ },
+ {
+ "busy": false,
+ "count": 458752,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ }
+ ],
"drive0": [
{
"busy": false,
@@ -4844,6 +5052,22 @@ write -P0x67 0x3fe0000 0x20000
{"return": ""}
{
"bitmaps": {
+ "backup-top": [
+ {
+ "busy": false,
+ "count": 67108864,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ },
+ {
+ "busy": false,
+ "count": 458752,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+ }
+ ],
"drive0": [
{
"busy": false,
diff --git a/tests/qemu-iotests/271 b/tests/qemu-iotests/271
index 2775b4d130..c7c2cadda0 100755
--- a/tests/qemu-iotests/271
+++ b/tests/qemu-iotests/271
@@ -896,7 +896,7 @@ _make_test_img -o extended_l2=on 1M
# Second and third writes in _concurrent_io() are independent and may finish in
# different order. So, filter offset out to match both possible variants.
_concurrent_io | $QEMU_IO | _filter_qemu_io | \
- $SED -e 's/\(20480\|40960\)/OFFSET/'
+ sed -e 's/\(20480\|40960\)/OFFSET/'
_concurrent_verify | $QEMU_IO | _filter_qemu_io
# success, all done
diff --git a/tests/qemu-iotests/296 b/tests/qemu-iotests/296
index 099a3eeaa5..f80ef3434a 100755
--- a/tests/qemu-iotests/296
+++ b/tests/qemu-iotests/296
@@ -174,8 +174,12 @@ class EncryptionSetupTestCase(iotests.QMPTestCase):
}
result = vm.qmp('x-blockdev-amend', **args)
- assert result['return'] == {}
- vm.run_job('job0')
+ iotests.log(result)
+ # Run the job only if it was created
+ event = ('JOB_STATUS_CHANGE',
+ {'data': {'id': 'job0', 'status': 'created'}})
+ if vm.events_wait([event], timeout=0.0) is not None:
+ vm.run_job('job0')
# test that when the image opened by two qemu processes,
# neither of them can update the encryption keys
diff --git a/tests/qemu-iotests/296.out b/tests/qemu-iotests/296.out
index 42205cc981..609826eaa0 100644
--- a/tests/qemu-iotests/296.out
+++ b/tests/qemu-iotests/296.out
@@ -1,11 +1,9 @@
-{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}
-Job failed: Failed to get shared "consistent read" lock
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}
-Job failed: Failed to get shared "consistent read" lock
-{"execute": "job-dismiss", "arguments": {"id": "job0"}}
+{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}}
+{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}}
{"return": {}}
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}
@@ -13,14 +11,9 @@ qemu-img: Failed to get shared "consistent read" lock
Is another process using the image [TEST_DIR/test.img]?
.
-Job failed: Block node is read-only
-{"execute": "job-dismiss", "arguments": {"id": "job0"}}
-{"return": {}}
-Job failed: Failed to get shared "consistent read" lock
-{"execute": "job-dismiss", "arguments": {"id": "job0"}}
-{"return": {}}
-Job failed: Failed to get shared "consistent read" lock
-{"execute": "job-dismiss", "arguments": {"id": "job0"}}
+{"error": {"class": "GenericError", "desc": "Block node is read-only"}}
+{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}}
+{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}}
{"return": {}}
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 75cc241580..21819db9c3 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -21,44 +21,44 @@
_filter_date()
{
- $SED -re 's/[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/yyyy-mm-dd hh:mm:ss/'
+ sed -Ee 's/[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/yyyy-mm-dd hh:mm:ss/'
}
_filter_vmstate_size()
{
- $SED -r -e 's/[0-9. ]{5} [KMGT]iB/ SIZE/' \
- -e 's/[0-9. ]{5} B/ SIZE/'
+ sed -E -e 's/[0-9. ]{5} [KMGT]iB/ SIZE/' \
+ -e 's/[0-9. ]{5} B/ SIZE/'
}
_filter_generated_node_ids()
{
- $SED -re 's/\#block[0-9]{3,}/NODE_NAME/'
+ sed -Ee 's/\#block[0-9]{3,}/NODE_NAME/'
}
_filter_qom_path()
{
- $SED -e '/Attached to:/s/\device[[0-9]\+\]/device[N]/g'
+ gsed -e '/Attached to:/s/\device[[0-9]\+\]/device[N]/g'
}
# replace occurrences of the actual TEST_DIR value with TEST_DIR
_filter_testdir()
{
- $SED -e "s#$TEST_DIR/#TEST_DIR/#g" \
- -e "s#$SOCK_DIR/#SOCK_DIR/#g" \
- -e "s#SOCK_DIR/fuse-#TEST_DIR/#g"
+ sed -e "s#$TEST_DIR/#TEST_DIR/#g" \
+ -e "s#$SOCK_DIR/#SOCK_DIR/#g" \
+ -e "s#SOCK_DIR/fuse-#TEST_DIR/#g"
}
# replace occurrences of the actual IMGFMT value with IMGFMT
_filter_imgfmt()
{
- $SED -e "s#$IMGFMT#IMGFMT#g"
+ sed -e "s#$IMGFMT#IMGFMT#g"
}
# Replace error message when the format is not supported and delete
# the output lines after the first one
_filter_qemu_img_check()
{
- $SED -e '/allocated.*fragmented.*compressed clusters/d' \
+ gsed -e '/allocated.*fragmented.*compressed clusters/d' \
-e 's/qemu-img: This image format does not support checks/No errors were found on the image./' \
-e '/Image end offset: [0-9]\+/d'
}
@@ -66,13 +66,14 @@ _filter_qemu_img_check()
# Removes \r from messages
_filter_win32()
{
- $SED -e 's/\r//g'
+ gsed -e 's/\r//g'
}
# sanitize qemu-io output
_filter_qemu_io()
{
- _filter_win32 | $SED -e "s/[0-9]* ops\; [0-9/:. sec]* ([0-9/.inf]* [EPTGMKiBbytes]*\/sec and [0-9/.inf]* ops\/sec)/X ops\; XX:XX:XX.X (XXX YYY\/sec and XXX ops\/sec)/" \
+ _filter_win32 | \
+ gsed -e "s/[0-9]* ops\; [0-9/:. sec]* ([0-9/.inf]* [EPTGMKiBbytes]*\/sec and [0-9/.inf]* ops\/sec)/X ops\; XX:XX:XX.X (XXX YYY\/sec and XXX ops\/sec)/" \
-e "s/: line [0-9][0-9]*: *[0-9][0-9]*\( Aborted\| Killed\)/:\1/" \
-e "s/qemu-io> //g"
}
@@ -80,7 +81,7 @@ _filter_qemu_io()
# replace occurrences of QEMU_PROG with "qemu"
_filter_qemu()
{
- $SED -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \
+ gsed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \
-e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \
-e $'s#\r##' # QEMU monitor uses \r\n line endings
}
@@ -89,7 +90,7 @@ _filter_qemu()
_filter_qmp()
{
_filter_win32 | \
- $SED -e 's#\("\(micro\)\?seconds": \)[0-9]\+#\1 TIMESTAMP#g' \
+ gsed -e 's#\("\(micro\)\?seconds": \)[0-9]\+#\1 TIMESTAMP#g' \
-e 's#^{"QMP":.*}$#QMP_VERSION#' \
-e '/^ "QMP": {\s*$/, /^ }\s*$/ c\' \
-e ' QMP_VERSION'
@@ -98,32 +99,32 @@ _filter_qmp()
# readline makes HMP command strings so long that git complains
_filter_hmp()
{
- $SED -e $'s/^\\((qemu) \\)\\?.*\e\\[D/\\1/g' \
+ gsed -e $'s/^\\((qemu) \\)\\?.*\e\\[D/\\1/g' \
-e $'s/\e\\[K//g'
}
# replace block job offset
_filter_block_job_offset()
{
- $SED -e 's/, "offset": [0-9]\+,/, "offset": OFFSET,/'
+ sed -e 's/, "offset": [0-9]\+,/, "offset": OFFSET,/'
}
# replace block job len
_filter_block_job_len()
{
- $SED -e 's/, "len": [0-9]\+,/, "len": LEN,/g'
+ sed -e 's/, "len": [0-9]\+,/, "len": LEN,/g'
}
# replace actual image size (depends on the host filesystem)
_filter_actual_image_size()
{
- $SED -s 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
+ gsed -s 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
}
# Filename filters for qemu-img create
_filter_img_create_filenames()
{
- $SED \
+ sed \
-e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \
-e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
-e "s#$TEST_DIR#TEST_DIR#g" \
@@ -141,7 +142,7 @@ _do_filter_img_create()
# precedes ", fmt=") and the options part ($options, which starts
# with "fmt=")
# (And just echo everything before the first "^Formatting")
- readarray formatting_line < <($SED -e 's/, fmt=/\n/')
+ readarray formatting_line < <(gsed -e 's/, fmt=/\n/')
filename_part=${formatting_line[0]}
unset formatting_line[0]
@@ -168,11 +169,11 @@ _do_filter_img_create()
options=$(
echo "$options" \
| tr '\n' '\0' \
- | $SED -e 's/ \([a-z0-9_.-]*\)=/\n\1=/g' \
+ | gsed -e 's/ \([a-z0-9_.-]*\)=/\n\1=/g' \
| grep -a -e '^fmt' -e '^size' -e '^backing' -e '^preallocation' \
-e '^encryption' "${grep_data_file[@]}" \
| _filter_img_create_filenames \
- | $SED \
+ | sed \
-e 's/^\(fmt\)/0-\1/' \
-e 's/^\(size\)/1-\1/' \
-e 's/^\(backing\)/2-\1/' \
@@ -180,9 +181,9 @@ _do_filter_img_create()
-e 's/^\(encryption\)/4-\1/' \
-e 's/^\(preallocation\)/8-\1/' \
| LC_ALL=C sort \
- | $SED -e 's/^[0-9]-//' \
+ | sed -e 's/^[0-9]-//' \
| tr '\n\0' ' \n' \
- | $SED -e 's/^ *$//' -e 's/ *$//'
+ | sed -e 's/^ *$//' -e 's/ *$//'
)
if [ -n "$options" ]; then
@@ -208,7 +209,7 @@ _filter_img_create()
_filter_img_create_size()
{
- $SED -e "s# size=[0-9]\\+# size=SIZE#g"
+ gsed -e "s# size=[0-9]\\+# size=SIZE#g"
}
_filter_img_info()
@@ -222,7 +223,7 @@ _filter_img_info()
discard=0
regex_json_spec_start='^ *"format-specific": \{'
- $SED -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \
+ gsed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \
-e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
-e "s#$TEST_DIR#TEST_DIR#g" \
-e "s#$SOCK_DIR#SOCK_DIR#g" \
@@ -284,7 +285,7 @@ _filter_qemu_img_map()
data_file_filter=(-e "s#$data_file_pattern#\\1#")
fi
- $SED -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \
+ sed -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \
-e 's/"offset": [0-9]\+/"offset": OFFSET/g' \
-e 's/Mapped to *//' \
"${data_file_filter[@]}" \
@@ -298,7 +299,7 @@ _filter_nbd()
# receive callbacks sometimes, making them unreliable.
#
# Filter out the TCP port number since this changes between runs.
- $SED -e '/nbd\/.*\.c:/d' \
+ sed -e '/nbd\/.*\.c:/d' \
-e 's#127\.0\.0\.1:[0-9]*#127.0.0.1:PORT#g' \
-e "s#?socket=$SOCK_DIR#?socket=SOCK_DIR#g" \
-e 's#\(foo\|PORT/\?\|.sock\): Failed to .*$#\1#'
@@ -335,14 +336,14 @@ sys.stdout.write(result)'
_filter_authz_check_tls()
{
- $SED -e 's/TLS x509 authz check for .* is denied/TLS x509 authz check for DISTINGUISHED-NAME is denied/'
+ sed -e 's/TLS x509 authz check for .* is denied/TLS x509 authz check for DISTINGUISHED-NAME is denied/'
}
_filter_qcow2_compression_type_bit()
{
- $SED -e 's/\(incompatible_features\s\+\)\[3\(, \)\?/\1[/' \
- -e 's/\(incompatible_features.*\), 3\]/\1]/' \
- -e 's/\(incompatible_features.*\), 3\(,.*\)/\1\2/'
+ gsed -e 's/\(incompatible_features\s\+\)\[3\(, \)\?/\1[/' \
+ -e 's/\(incompatible_features.*\), 3\]/\1]/' \
+ -e 's/\(incompatible_features.*\), 3\(,.*\)/\1\2/'
}
# make sure this script returns success
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 9885030b43..227e0a5be9 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -17,17 +17,28 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
-SED=
-for sed in sed gsed; do
- ($sed --version | grep 'GNU sed') > /dev/null 2>&1
- if [ "$?" -eq 0 ]; then
- SED=$sed
- break
- fi
-done
-if [ -z "$SED" ]; then
- echo "$0: GNU sed not found"
- exit 1
+# bail out, setting up .notrun file
+_notrun()
+{
+ echo "$*" >"$TEST_DIR/$seq.notrun"
+ echo "$seq not run: $*"
+ status=0
+ exit
+}
+
+if ! command -v gsed >/dev/null 2>&1; then
+ if sed --version 2>&1 | grep -v 'not GNU sed' | grep 'GNU sed' > /dev/null;
+ then
+ gsed()
+ {
+ sed "$@"
+ }
+ else
+ gsed()
+ {
+ _notrun "GNU sed not available"
+ }
+ fi
fi
dd()
@@ -722,30 +733,20 @@ _img_info()
done
}
-# bail out, setting up .notrun file
-#
-_notrun()
-{
- echo "$*" >"$OUTPUT_DIR/$seq.notrun"
- echo "$seq not run: $*"
- status=0
- exit
-}
-
# bail out, setting up .casenotrun file
# The function _casenotrun() is used as a notifier. It is the
# caller's responsibility to make skipped a particular test.
#
_casenotrun()
{
- echo " [case not run] $*" >>"$OUTPUT_DIR/$seq.casenotrun"
+ echo " [case not run] $*" >>"$TEST_DIR/$seq.casenotrun"
}
# just plain bail out
#
_fail()
{
- echo "$*" | tee -a "$OUTPUT_DIR/$seq.full"
+ echo "$*" | tee -a "$TEST_DIR/$seq.full"
echo "(see $seq.full for details)"
status=1
exit 1
@@ -920,7 +921,7 @@ _require_working_luks()
IMGFMT='luks' _rm_test_img "$file"
if [ $status != 0 ]; then
- reason=$(echo "$output" | grep "$file:" | $SED -e "s#.*$file: *##")
+ reason=$(echo "$output" | grep "$file:" | sed -e "s#.*$file: *##")
if [ -z "$reason" ]; then
reason="Failed to create a LUKS image"
fi
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 6ba65eb1ff..508adade9e 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -39,6 +39,7 @@ from contextlib import contextmanager
from qemu.machine import qtest
from qemu.qmp import QMPMessage
+from qemu.aqmp.legacy import QEMUMonitorProtocol
# Use this logger for logging messages directly from the iotests module
logger = logging.getLogger('qemu.iotests')
@@ -84,7 +85,6 @@ qemu_print = os.environ.get('PRINT_QEMU', False)
imgfmt = os.environ.get('IMGFMT', 'raw')
imgproto = os.environ.get('IMGPROTO', 'file')
-output_dir = os.environ.get('OUTPUT_DIR', '.')
try:
test_dir = os.environ['TEST_DIR']
@@ -278,6 +278,9 @@ def qemu_io(*args):
'''Run qemu-io and return the stdout data'''
return qemu_tool_pipe_and_status('qemu-io', qemu_io_wrap_args(args))[0]
+def qemu_io_pipe_and_status(*args):
+ return qemu_tool_pipe_and_status('qemu-io', qemu_io_wrap_args(args))
+
def qemu_io_log(*args):
result = qemu_io(*args)
log(result, filters=[filter_testfiles, filter_qemu_io])
@@ -348,14 +351,30 @@ class QemuIoInteractive:
class QemuStorageDaemon:
- def __init__(self, *args: str, instance_id: str = 'a'):
+ _qmp: Optional[QEMUMonitorProtocol] = None
+ _qmpsock: Optional[str] = None
+ # Python < 3.8 would complain if this type were not a string literal
+ # (importing `annotations` from `__future__` would work; but not on <= 3.6)
+ _p: 'Optional[subprocess.Popen[bytes]]' = None
+
+ def __init__(self, *args: str, instance_id: str = 'a', qmp: bool = False):
assert '--pidfile' not in args
self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid')
all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile]
+ if qmp:
+ self._qmpsock = os.path.join(sock_dir, f'qsd-{instance_id}.sock')
+ all_args += ['--chardev',
+ f'socket,id=qmp-sock,path={self._qmpsock}',
+ '--monitor', 'qmp-sock']
+
+ self._qmp = QEMUMonitorProtocol(self._qmpsock, server=True)
+
# Cannot use with here, we want the subprocess to stay around
# pylint: disable=consider-using-with
self._p = subprocess.Popen(all_args)
+ if self._qmp is not None:
+ self._qmp.accept()
while not os.path.exists(self.pidfile):
if self._p.poll() is not None:
cmd = ' '.join(all_args)
@@ -370,11 +389,24 @@ class QemuStorageDaemon:
assert self._pid == self._p.pid
+ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \
+ -> QMPMessage:
+ assert self._qmp is not None
+ return self._qmp.cmd(cmd, args)
+
def stop(self, kill_signal=15):
self._p.send_signal(kill_signal)
self._p.wait()
self._p = None
+ if self._qmp:
+ self._qmp.close()
+
+ if self._qmpsock is not None:
+ try:
+ os.remove(self._qmpsock)
+ except OSError:
+ pass
try:
os.remove(self.pidfile)
except OSError:
@@ -1209,7 +1241,7 @@ def notrun(reason):
# Each test in qemu-iotests has a number ("seq")
seq = os.path.basename(sys.argv[0])
- with open('%s/%s.notrun' % (output_dir, seq), 'w', encoding='utf-8') \
+ with open('%s/%s.notrun' % (test_dir, seq), 'w', encoding='utf-8') \
as outfile:
outfile.write(reason + '\n')
logger.warning("%s not run: %s", seq, reason)
@@ -1224,7 +1256,7 @@ def case_notrun(reason):
# Each test in qemu-iotests has a number ("seq")
seq = os.path.basename(sys.argv[0])
- with open('%s/%s.casenotrun' % (output_dir, seq), 'a', encoding='utf-8') \
+ with open('%s/%s.casenotrun' % (test_dir, seq), 'a', encoding='utf-8') \
as outfile:
outfile.write(' [case not run] ' + reason + '\n')
diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py
index 0f32897fe8..b11e943c8a 100644
--- a/tests/qemu-iotests/testenv.py
+++ b/tests/qemu-iotests/testenv.py
@@ -66,7 +66,7 @@ class TestEnv(ContextManager['TestEnv']):
# pylint: disable=too-many-instance-attributes
env_variables = ['PYTHONPATH', 'TEST_DIR', 'SOCK_DIR', 'SAMPLE_IMG_DIR',
- 'OUTPUT_DIR', 'PYTHON', 'QEMU_PROG', 'QEMU_IMG_PROG',
+ 'PYTHON', 'QEMU_PROG', 'QEMU_IMG_PROG',
'QEMU_IO_PROG', 'QEMU_NBD_PROG', 'QSD_PROG',
'QEMU_OPTIONS', 'QEMU_IMG_OPTIONS',
'QEMU_IO_OPTIONS', 'QEMU_IO_OPTIONS_NO_FMT',
@@ -106,7 +106,6 @@ class TestEnv(ContextManager['TestEnv']):
TEST_DIR
SOCK_DIR
SAMPLE_IMG_DIR
- OUTPUT_DIR
"""
# Path where qemu goodies live in this source tree.
@@ -134,8 +133,6 @@ class TestEnv(ContextManager['TestEnv']):
os.path.join(self.source_iotests,
'sample_images'))
- self.output_dir = os.getcwd() # OUTPUT_DIR
-
def init_binaries(self) -> None:
"""Init binary path variables:
PYTHON (for bash tests)
diff --git a/tests/qemu-iotests/testrunner.py b/tests/qemu-iotests/testrunner.py
index 9a94273975..41083ff9c6 100644
--- a/tests/qemu-iotests/testrunner.py
+++ b/tests/qemu-iotests/testrunner.py
@@ -259,9 +259,6 @@ class TestRunner(ContextManager['TestRunner']):
"""
f_test = Path(test)
- f_bad = Path(f_test.name + '.out.bad')
- f_notrun = Path(f_test.name + '.notrun')
- f_casenotrun = Path(f_test.name + '.casenotrun')
f_reference = Path(self.find_reference(test))
if not f_test.exists():
@@ -276,9 +273,6 @@ class TestRunner(ContextManager['TestRunner']):
description='No qualified output '
f'(expected {f_reference})')
- for p in (f_bad, f_notrun, f_casenotrun):
- silent_unlink(p)
-
args = [str(f_test.resolve())]
env = self.env.prepare_subprocess(args)
if mp:
@@ -288,6 +282,14 @@ class TestRunner(ContextManager['TestRunner']):
env[d] = os.path.join(env[d], f_test.name)
Path(env[d]).mkdir(parents=True, exist_ok=True)
+ test_dir = env['TEST_DIR']
+ f_bad = Path(test_dir, f_test.name + '.out.bad')
+ f_notrun = Path(test_dir, f_test.name + '.notrun')
+ f_casenotrun = Path(test_dir, f_test.name + '.casenotrun')
+
+ for p in (f_notrun, f_casenotrun):
+ silent_unlink(p)
+
t0 = time.time()
with f_bad.open('w', encoding="utf-8") as f:
with subprocess.Popen(args, cwd=str(f_test.parent), env=env,
@@ -365,7 +367,10 @@ class TestRunner(ContextManager['TestRunner']):
description=res.description)
if res.casenotrun:
- print(res.casenotrun)
+ if self.tap:
+ print('#' + res.casenotrun.replace('\n', '\n#'))
+ else:
+ print(res.casenotrun)
return res
diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io
new file mode 100755
index 0000000000..567e8cf21e
--- /dev/null
+++ b/tests/qemu-iotests/tests/graph-changes-while-io
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+# group: rw
+#
+# Test graph changes while I/O is happening
+#
+# Copyright (C) 2022 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+from threading import Thread
+import iotests
+from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \
+ QemuStorageDaemon
+
+
+top = os.path.join(iotests.test_dir, 'top.img')
+nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
+
+
+def do_qemu_img_bench() -> None:
+ """
+ Do some I/O requests on `nbd_sock`.
+ """
+ assert qemu_img('bench', '-f', 'raw', '-c', '2000000',
+ f'nbd+unix:///node0?socket={nbd_sock}') == 0
+
+
+class TestGraphChangesWhileIO(QMPTestCase):
+ def setUp(self) -> None:
+ # Create an overlay that can be added at runtime on top of the
+ # null-co block node that will receive I/O
+ assert qemu_img_create('-f', imgfmt, '-F', 'raw', '-b', 'null-co://',
+ top) == 0
+
+ # QSD instance with a null-co block node in an I/O thread,
+ # exported over NBD (on `nbd_sock`, export name "node0")
+ self.qsd = QemuStorageDaemon(
+ '--object', 'iothread,id=iothread0',
+ '--blockdev', 'null-co,node-name=node0,read-zeroes=true',
+ '--nbd-server', f'addr.type=unix,addr.path={nbd_sock}',
+ '--export', 'nbd,id=exp0,node-name=node0,iothread=iothread0,' +
+ 'fixed-iothread=true,writable=true',
+ qmp=True
+ )
+
+ def tearDown(self) -> None:
+ self.qsd.stop()
+
+ def test_blockdev_add_while_io(self) -> None:
+ # Run qemu-img bench in the background
+ bench_thr = Thread(target=do_qemu_img_bench)
+ bench_thr.start()
+
+ # While qemu-img bench is running, repeatedly add and remove an
+ # overlay to/from node0
+ while bench_thr.is_alive():
+ result = self.qsd.qmp('blockdev-add', {
+ 'driver': imgfmt,
+ 'node-name': 'overlay',
+ 'backing': 'node0',
+ 'file': {
+ 'driver': 'file',
+ 'filename': top
+ }
+ })
+ self.assert_qmp(result, 'return', {})
+
+ result = self.qsd.qmp('blockdev-del', {
+ 'node-name': 'overlay'
+ })
+ self.assert_qmp(result, 'return', {})
+
+ bench_thr.join()
+
+if __name__ == '__main__':
+ # Format must support raw backing files
+ iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'],
+ supported_protocols=['file'])
diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out
new file mode 100644
index 0000000000..ae1213e6f8
--- /dev/null
+++ b/tests/qemu-iotests/tests/graph-changes-while-io.out
@@ -0,0 +1,5 @@
+.
+----------------------------------------------------------------------
+Ran 1 tests
+
+OK
diff --git a/tests/qemu-iotests/tests/image-fleecing b/tests/qemu-iotests/tests/image-fleecing
index a58b5a1781..c56278639c 100755
--- a/tests/qemu-iotests/tests/image-fleecing
+++ b/tests/qemu-iotests/tests/image-fleecing
@@ -23,12 +23,14 @@
# Creator/Owner: John Snow <jsnow@redhat.com>
import iotests
-from iotests import log, qemu_img, qemu_io, qemu_io_silent
+from iotests import log, qemu_img, qemu_io, qemu_io_silent, \
+ qemu_io_pipe_and_status
iotests.script_initialize(
- supported_fmts=['qcow2', 'qcow', 'qed', 'vmdk', 'vhdx', 'raw'],
+ supported_fmts=['qcow2'],
supported_platforms=['linux'],
required_fmts=['copy-before-write'],
+ unsupported_imgopts=['compat']
)
patterns = [('0x5d', '0', '64k'),
@@ -49,12 +51,30 @@ remainder = [('0xd5', '0x108000', '32k'), # Right-end of partial-left [1]
('0xdc', '32M', '32k'), # Left-end of partial-right [2]
('0xcd', '0x3ff0000', '64k')] # patterns[3]
-def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm):
+def do_test(vm, use_cbw, use_snapshot_access_filter, base_img_path,
+ fleece_img_path, nbd_sock_path=None,
+ target_img_path=None,
+ bitmap=False):
+ push_backup = target_img_path is not None
+ assert (nbd_sock_path is not None) != push_backup
+ if push_backup:
+ assert use_cbw
+
log('--- Setting up images ---')
log('')
assert qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M') == 0
- assert qemu_img('create', '-f', 'qcow2', fleece_img_path, '64M') == 0
+ if bitmap:
+ assert qemu_img('bitmap', '--add', base_img_path, 'bitmap0') == 0
+
+ if use_snapshot_access_filter:
+ assert use_cbw
+ assert qemu_img('create', '-f', 'raw', fleece_img_path, '64M') == 0
+ else:
+ assert qemu_img('create', '-f', 'qcow2', fleece_img_path, '64M') == 0
+
+ if push_backup:
+ assert qemu_img('create', '-f', 'qcow2', target_img_path, '64M') == 0
for p in patterns:
qemu_io('-f', iotests.imgfmt,
@@ -81,27 +101,46 @@ def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm):
log('')
- # create tmp_node backed by src_node
- log(vm.qmp('blockdev-add', {
- 'driver': 'qcow2',
- 'node-name': tmp_node,
- 'file': {
+ if use_snapshot_access_filter:
+ log(vm.qmp('blockdev-add', {
+ 'node-name': tmp_node,
'driver': 'file',
'filename': fleece_img_path,
- },
- 'backing': src_node,
- }))
+ }))
+ else:
+ # create tmp_node backed by src_node
+ log(vm.qmp('blockdev-add', {
+ 'driver': 'qcow2',
+ 'node-name': tmp_node,
+ 'file': {
+ 'driver': 'file',
+ 'filename': fleece_img_path,
+ },
+ 'backing': src_node,
+ }))
# Establish CBW from source to fleecing node
if use_cbw:
- log(vm.qmp('blockdev-add', {
+ fl_cbw = {
'driver': 'copy-before-write',
'node-name': 'fl-cbw',
'file': src_node,
'target': tmp_node
- }))
+ }
+
+ if bitmap:
+ fl_cbw['bitmap'] = {'node': src_node, 'name': 'bitmap0'}
+
+ log(vm.qmp('blockdev-add', fl_cbw))
log(vm.qmp('qom-set', path=qom_path, property='drive', value='fl-cbw'))
+
+ if use_snapshot_access_filter:
+ log(vm.qmp('blockdev-add', {
+ 'driver': 'snapshot-access',
+ 'node-name': 'fl-access',
+ 'file': 'fl-cbw',
+ }))
else:
log(vm.qmp('blockdev-backup',
job_id='fleecing',
@@ -109,25 +148,47 @@ def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm):
target=tmp_node,
sync='none'))
- log('')
- log('--- Setting up NBD Export ---')
- log('')
+ export_node = 'fl-access' if use_snapshot_access_filter else tmp_node
+
+ if push_backup:
+ log('')
+ log('--- Starting actual backup ---')
+ log('')
- nbd_uri = 'nbd+unix:///%s?socket=%s' % (tmp_node, nbd_sock_path)
- log(vm.qmp('nbd-server-start',
- {'addr': {'type': 'unix',
- 'data': {'path': nbd_sock_path}}}))
+ log(vm.qmp('blockdev-add', **{
+ 'driver': iotests.imgfmt,
+ 'node-name': 'target',
+ 'file': {
+ 'driver': 'file',
+ 'filename': target_img_path
+ }
+ }))
+ log(vm.qmp('blockdev-backup', device=export_node,
+ sync='full', target='target',
+ job_id='push-backup', speed=1))
+ else:
+ log('')
+ log('--- Setting up NBD Export ---')
+ log('')
- log(vm.qmp('nbd-server-add', device=tmp_node))
+ nbd_uri = 'nbd+unix:///%s?socket=%s' % (export_node, nbd_sock_path)
+ log(vm.qmp('nbd-server-start',
+ {'addr': { 'type': 'unix',
+ 'data': { 'path': nbd_sock_path } } }))
- log('')
- log('--- Sanity Check ---')
- log('')
+ log(vm.qmp('nbd-server-add', device=export_node))
- for p in patterns + zeroes:
- cmd = 'read -P%s %s %s' % p
- log(cmd)
- assert qemu_io_silent('-r', '-f', 'raw', '-c', cmd, nbd_uri) == 0
+ log('')
+ log('--- Sanity Check ---')
+ log('')
+
+ for p in patterns + zeroes:
+ cmd = 'read -P%s %s %s' % p
+ log(cmd)
+ out, ret = qemu_io_pipe_and_status('-r', '-f', 'raw', '-c', cmd,
+ nbd_uri)
+ if ret != 0:
+ print(out)
log('')
log('--- Testing COW ---')
@@ -138,6 +199,23 @@ def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm):
log(cmd)
log(vm.hmp_qemu_io(qom_path, cmd, qdev=True))
+ if push_backup:
+ # Check that previous operations were done during backup, not after
+ # If backup is already finished, it's possible that it was finished
+ # even before hmp qemu_io write, and we didn't actually test
+ # copy-before-write operation. This should not happen, as we use
+ # speed=1. But worth checking.
+ result = vm.qmp('query-block-jobs')
+ assert len(result['return']) == 1
+
+ result = vm.qmp('block-job-set-speed', device='push-backup', speed=0)
+ assert result == {'return': {}}
+
+ log(vm.event_wait(name='BLOCK_JOB_COMPLETED',
+ match={'data': {'device': 'push-backup'}}),
+ filters=[iotests.filter_qmp_event])
+ log(vm.qmp('blockdev-del', node_name='target'))
+
log('')
log('--- Verifying Data ---')
log('')
@@ -145,13 +223,25 @@ def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm):
for p in patterns + zeroes:
cmd = 'read -P%s %s %s' % p
log(cmd)
- assert qemu_io_silent('-r', '-f', 'raw', '-c', cmd, nbd_uri) == 0
+ args = ['-r', '-c', cmd]
+ if push_backup:
+ args += [target_img_path]
+ else:
+ args += ['-f', 'raw', nbd_uri]
+ out, ret = qemu_io_pipe_and_status(*args)
+ if ret != 0:
+ print(out)
log('')
log('--- Cleanup ---')
log('')
+ if not push_backup:
+ log(vm.qmp('nbd-server-stop'))
+
if use_cbw:
+ if use_snapshot_access_filter:
+ log(vm.qmp('blockdev-del', node_name='fl-access'))
log(vm.qmp('qom-set', path=qom_path, property='drive', value=src_node))
log(vm.qmp('blockdev-del', node_name='fl-cbw'))
else:
@@ -160,7 +250,6 @@ def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm):
assert e is not None
log(e, filters=[iotests.filter_qmp_event])
- log(vm.qmp('nbd-server-stop'))
log(vm.qmp('blockdev-del', node_name=tmp_node))
vm.shutdown()
@@ -177,17 +266,37 @@ def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm):
log('Done')
-def test(use_cbw):
+def test(use_cbw, use_snapshot_access_filter,
+ nbd_sock_path=None, target_img_path=None, bitmap=False):
with iotests.FilePath('base.img') as base_img_path, \
iotests.FilePath('fleece.img') as fleece_img_path, \
- iotests.FilePath('nbd.sock',
- base_dir=iotests.sock_dir) as nbd_sock_path, \
iotests.VM() as vm:
- do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm)
+ do_test(vm, use_cbw, use_snapshot_access_filter, base_img_path,
+ fleece_img_path, nbd_sock_path, target_img_path,
+ bitmap=bitmap)
+
+def test_pull(use_cbw, use_snapshot_access_filter, bitmap=False):
+ with iotests.FilePath('nbd.sock',
+ base_dir=iotests.sock_dir) as nbd_sock_path:
+ test(use_cbw, use_snapshot_access_filter, nbd_sock_path, None,
+ bitmap=bitmap)
+
+def test_push():
+ with iotests.FilePath('target.img') as target_img_path:
+ test(True, True, None, target_img_path)
log('=== Test backup(sync=none) based fleecing ===\n')
-test(False)
+test_pull(False, False)
+
+log('=== Test cbw-filter based fleecing ===\n')
+test_pull(True, False)
+
+log('=== Test fleecing-format based fleecing ===\n')
+test_pull(True, True)
+
+log('=== Test fleecing-format based fleecing with bitmap ===\n')
+test_pull(True, True, bitmap=True)
-log('=== Test filter based fleecing ===\n')
-test(True)
+log('=== Test push backup with fleecing ===\n')
+test_push()
diff --git a/tests/qemu-iotests/tests/image-fleecing.out b/tests/qemu-iotests/tests/image-fleecing.out
index e96d122a8b..acfc89ff0e 100644
--- a/tests/qemu-iotests/tests/image-fleecing.out
+++ b/tests/qemu-iotests/tests/image-fleecing.out
@@ -52,8 +52,150 @@ read -P0 0x3fe0000 64k
--- Cleanup ---
{"return": {}}
+{"return": {}}
{"data": {"device": "fleecing", "len": 67108864, "offset": 393216, "speed": 0, "type": "backup"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
{"return": {}}
+
+--- Confirming writes ---
+
+read -P0xab 0 64k
+read -P0xad 0x00f8000 64k
+read -P0x1d 0x2008000 64k
+read -P0xea 0x3fe0000 64k
+read -P0xd5 0x108000 32k
+read -P0xdc 32M 32k
+read -P0xcd 0x3ff0000 64k
+
+Done
+=== Test cbw-filter based fleecing ===
+
+--- Setting up images ---
+
+Done
+
+--- Launching VM ---
+
+Done
+
+--- Setting up Fleecing Graph ---
+
+{"return": {}}
+{"return": {}}
+{"return": {}}
+
+--- Setting up NBD Export ---
+
+{"return": {}}
+{"return": {}}
+
+--- Sanity Check ---
+
+read -P0x5d 0 64k
+read -P0xd5 1M 64k
+read -P0xdc 32M 64k
+read -P0xcd 0x3ff0000 64k
+read -P0 0x00f8000 32k
+read -P0 0x2010000 32k
+read -P0 0x3fe0000 64k
+
+--- Testing COW ---
+
+write -P0xab 0 64k
+{"return": ""}
+write -P0xad 0x00f8000 64k
+{"return": ""}
+write -P0x1d 0x2008000 64k
+{"return": ""}
+write -P0xea 0x3fe0000 64k
+{"return": ""}
+
+--- Verifying Data ---
+
+read -P0x5d 0 64k
+read -P0xd5 1M 64k
+read -P0xdc 32M 64k
+read -P0xcd 0x3ff0000 64k
+read -P0 0x00f8000 32k
+read -P0 0x2010000 32k
+read -P0 0x3fe0000 64k
+
+--- Cleanup ---
+
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+
+--- Confirming writes ---
+
+read -P0xab 0 64k
+read -P0xad 0x00f8000 64k
+read -P0x1d 0x2008000 64k
+read -P0xea 0x3fe0000 64k
+read -P0xd5 0x108000 32k
+read -P0xdc 32M 32k
+read -P0xcd 0x3ff0000 64k
+
+Done
+=== Test fleecing-format based fleecing ===
+
+--- Setting up images ---
+
+Done
+
+--- Launching VM ---
+
+Done
+
+--- Setting up Fleecing Graph ---
+
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+
+--- Setting up NBD Export ---
+
+{"return": {}}
+{"return": {}}
+
+--- Sanity Check ---
+
+read -P0x5d 0 64k
+read -P0xd5 1M 64k
+read -P0xdc 32M 64k
+read -P0xcd 0x3ff0000 64k
+read -P0 0x00f8000 32k
+read -P0 0x2010000 32k
+read -P0 0x3fe0000 64k
+
+--- Testing COW ---
+
+write -P0xab 0 64k
+{"return": ""}
+write -P0xad 0x00f8000 64k
+{"return": ""}
+write -P0x1d 0x2008000 64k
+{"return": ""}
+write -P0xea 0x3fe0000 64k
+{"return": ""}
+
+--- Verifying Data ---
+
+read -P0x5d 0 64k
+read -P0xd5 1M 64k
+read -P0xdc 32M 64k
+read -P0xcd 0x3ff0000 64k
+read -P0 0x00f8000 32k
+read -P0 0x2010000 32k
+read -P0 0x3fe0000 64k
+
+--- Cleanup ---
+
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
{"return": {}}
--- Confirming writes ---
@@ -67,7 +209,7 @@ read -P0xdc 32M 32k
read -P0xcd 0x3ff0000 64k
Done
-=== Test filter based fleecing ===
+=== Test fleecing-format based fleecing with bitmap ===
--- Setting up images ---
@@ -82,6 +224,7 @@ Done
{"return": {}}
{"return": {}}
{"return": {}}
+{"return": {}}
--- Setting up NBD Export ---
@@ -95,8 +238,82 @@ read -P0xd5 1M 64k
read -P0xdc 32M 64k
read -P0xcd 0x3ff0000 64k
read -P0 0x00f8000 32k
+read failed: Invalid argument
+
+read -P0 0x2010000 32k
+read failed: Invalid argument
+
+read -P0 0x3fe0000 64k
+read failed: Invalid argument
+
+
+--- Testing COW ---
+
+write -P0xab 0 64k
+{"return": ""}
+write -P0xad 0x00f8000 64k
+{"return": ""}
+write -P0x1d 0x2008000 64k
+{"return": ""}
+write -P0xea 0x3fe0000 64k
+{"return": ""}
+
+--- Verifying Data ---
+
+read -P0x5d 0 64k
+read -P0xd5 1M 64k
+read -P0xdc 32M 64k
+read -P0xcd 0x3ff0000 64k
+read -P0 0x00f8000 32k
+read failed: Invalid argument
+
read -P0 0x2010000 32k
+read failed: Invalid argument
+
read -P0 0x3fe0000 64k
+read failed: Invalid argument
+
+
+--- Cleanup ---
+
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+
+--- Confirming writes ---
+
+read -P0xab 0 64k
+read -P0xad 0x00f8000 64k
+read -P0x1d 0x2008000 64k
+read -P0xea 0x3fe0000 64k
+read -P0xd5 0x108000 32k
+read -P0xdc 32M 32k
+read -P0xcd 0x3ff0000 64k
+
+Done
+=== Test push backup with fleecing ===
+
+--- Setting up images ---
+
+Done
+
+--- Launching VM ---
+
+Done
+
+--- Setting up Fleecing Graph ---
+
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+
+--- Starting actual backup ---
+
+{"return": {}}
+{"return": {}}
--- Testing COW ---
@@ -108,6 +325,8 @@ write -P0x1d 0x2008000 64k
{"return": ""}
write -P0xea 0x3fe0000 64k
{"return": ""}
+{"data": {"device": "push-backup", "len": 67108864, "offset": 67108864, "speed": 0, "type": "backup"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"return": {}}
--- Verifying Data ---
diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
index 502e5ad0c7..01ca076afe 100644
--- a/tests/qtest/virtio-9p-test.c
+++ b/tests/qtest/virtio-9p-test.c
@@ -1253,7 +1253,7 @@ static void fs_unlinkat_dir(void *obj, void *data, QGuestAllocator *t_alloc)
/* ... and is actually a directory */
g_assert((st.st_mode & S_IFMT) == S_IFDIR);
- do_unlinkat(v9p, "/", "02", AT_REMOVEDIR);
+ do_unlinkat(v9p, "/", "02", P9_DOTL_AT_REMOVEDIR);
/* directory should be gone now */
g_assert(stat(new_dir, &st) != 0);
}
diff --git a/tests/tcg/configure.sh b/tests/tcg/configure.sh
index 0663bd19f4..ed4b5ccb1f 100755
--- a/tests/tcg/configure.sh
+++ b/tests/tcg/configure.sh
@@ -64,9 +64,9 @@ fi
: ${cross_cc_ppc="powerpc-linux-gnu-gcc"}
: ${cross_cc_cflags_ppc="-m32"}
: ${cross_cc_ppc64="powerpc64-linux-gnu-gcc"}
-: ${cross_cc_cflags_ppc64="-m64 -mbig"}
+: ${cross_cc_cflags_ppc64="-m64 -mbig-endian"}
: ${cross_cc_ppc64le="$cross_cc_ppc64"}
-: ${cross_cc_cflags_ppc64le="-m64 -mlittle"}
+: ${cross_cc_cflags_ppc64le="-m64 -mlittle-endian"}
: ${cross_cc_riscv64="riscv64-linux-gnu-gcc"}
: ${cross_cc_s390x="s390x-linux-gnu-gcc"}
: ${cross_cc_sh4="sh4-linux-gnu-gcc"}
diff --git a/tests/tcg/ppc64le/bcdsub.c b/tests/tcg/ppc64le/bcdsub.c
index 8c188cae6d..87c8c44a44 100644
--- a/tests/tcg/ppc64le/bcdsub.c
+++ b/tests/tcg/ppc64le/bcdsub.c
@@ -1,6 +1,7 @@
#include <assert.h>
#include <unistd.h>
#include <signal.h>
+#include <stdint.h>
#define CRF_LT (1 << 3)
#define CRF_GT (1 << 2)
@@ -8,24 +9,50 @@
#define CRF_SO (1 << 0)
#define UNDEF 0
-#define BCDSUB(vra, vrb, ps) \
- asm ("bcdsub. %1,%2,%3,%4;" \
- "mfocrf %0,0b10;" \
- : "=r" (cr), "=v" (vrt) \
- : "v" (vra), "v" (vrb), "i" (ps) \
- : );
-
-#define TEST(vra, vrb, ps, exp_res, exp_cr6) \
- do { \
- __int128 vrt = 0; \
- int cr = 0; \
- BCDSUB(vra, vrb, ps); \
- if (exp_res) \
- assert(vrt == exp_res); \
- assert((cr >> 4) == exp_cr6); \
+#ifdef __has_builtin
+#if !__has_builtin(__builtin_bcdsub)
+#define NO_BUILTIN_BCDSUB
+#endif
+#endif
+
+#ifdef NO_BUILTIN_BCDSUB
+#define BCDSUB(T, A, B, PS) \
+ ".long 4 << 26 | (" #T ") << 21 | (" #A ") << 16 | (" #B ") << 11" \
+ " | 1 << 10 | (" #PS ") << 9 | 65\n\t"
+#else
+#define BCDSUB(T, A, B, PS) "bcdsub. " #T ", " #A ", " #B ", " #PS "\n\t"
+#endif
+
+#define TEST(AH, AL, BH, BL, PS, TH, TL, CR6) \
+ do { \
+ int cr = 0; \
+ uint64_t th, tl; \
+ /* \
+ * Use GPR pairs to load the VSR values and place the resulting VSR and\
+ * CR6 in th, tl, and cr. Note that we avoid newer instructions (e.g., \
+ * mtvsrdd/mfvsrld) so we can run this test on POWER8 machines. \
+ */ \
+ asm ("mtvsrd 32, %3\n\t" \
+ "mtvsrd 33, %4\n\t" \
+ "xxmrghd 32, 32, 33\n\t" \
+ "mtvsrd 33, %5\n\t" \
+ "mtvsrd 34, %6\n\t" \
+ "xxmrghd 33, 33, 34\n\t" \
+ BCDSUB(0, 0, 1, PS) \
+ "mfocrf %0, 0b10\n\t" \
+ "mfvsrd %1, 32\n\t" \
+ "xxswapd 32, 32\n\t" \
+ "mfvsrd %2, 32\n\t" \
+ : "=r" (cr), "=r" (th), "=r" (tl) \
+ : "r" (AH), "r" (AL), "r" (BH), "r" (BL) \
+ : "v0", "v1", "v2"); \
+ if (TH != UNDEF || TL != UNDEF) { \
+ assert(tl == TL); \
+ assert(th == TH); \
+ } \
+ assert((cr >> 4) == CR6); \
} while (0)
-
/*
* Unbounded result is equal to zero:
* sign = (PS) ? 0b1111 : 0b1100
@@ -33,13 +60,13 @@
*/
void test_bcdsub_eq(void)
{
- __int128 a, b;
-
/* maximum positive BCD value */
- a = b = (((__int128) 0x9999999999999999) << 64 | 0x999999999999999c);
-
- TEST(a, b, 0, 0xc, CRF_EQ);
- TEST(a, b, 1, 0xf, CRF_EQ);
+ TEST(0x9999999999999999, 0x999999999999999c,
+ 0x9999999999999999, 0x999999999999999c,
+ 0, 0x0, 0xc, CRF_EQ);
+ TEST(0x9999999999999999, 0x999999999999999c,
+ 0x9999999999999999, 0x999999999999999c,
+ 1, 0x0, 0xf, CRF_EQ);
}
/*
@@ -49,21 +76,16 @@ void test_bcdsub_eq(void)
*/
void test_bcdsub_gt(void)
{
- __int128 a, b, c;
-
- /* maximum positive BCD value */
- a = (((__int128) 0x9999999999999999) << 64 | 0x999999999999999c);
-
- /* negative one BCD value */
- b = (__int128) 0x1d;
-
- TEST(a, b, 0, 0xc, (CRF_GT | CRF_SO));
- TEST(a, b, 1, 0xf, (CRF_GT | CRF_SO));
-
- c = (((__int128) 0x9999999999999999) << 64 | 0x999999999999998c);
-
- TEST(c, b, 0, a, CRF_GT);
- TEST(c, b, 1, (a | 0x3), CRF_GT);
+ /* maximum positive and negative one BCD values */
+ TEST(0x9999999999999999, 0x999999999999999c, 0x0, 0x1d, 0,
+ 0x0, 0xc, (CRF_GT | CRF_SO));
+ TEST(0x9999999999999999, 0x999999999999999c, 0x0, 0x1d, 1,
+ 0x0, 0xf, (CRF_GT | CRF_SO));
+
+ TEST(0x9999999999999999, 0x999999999999998c, 0x0, 0x1d, 0,
+ 0x9999999999999999, 0x999999999999999c, CRF_GT);
+ TEST(0x9999999999999999, 0x999999999999998c, 0x0, 0x1d, 1,
+ 0x9999999999999999, 0x999999999999999f, CRF_GT);
}
/*
@@ -73,45 +95,27 @@ void test_bcdsub_gt(void)
*/
void test_bcdsub_lt(void)
{
- __int128 a, b;
-
- /* positive zero BCD value */
- a = (__int128) 0xc;
-
- /* positive one BCD value */
- b = (__int128) 0x1c;
-
- TEST(a, b, 0, 0x1d, CRF_LT);
- TEST(a, b, 1, 0x1d, CRF_LT);
-
- /* maximum negative BCD value */
- a = (((__int128) 0x9999999999999999) << 64 | 0x999999999999999d);
-
- /* positive one BCD value */
- b = (__int128) 0x1c;
-
- TEST(a, b, 0, 0xd, (CRF_LT | CRF_SO));
- TEST(a, b, 1, 0xd, (CRF_LT | CRF_SO));
+ /* positive zero and positive one BCD values */
+ TEST(0x0, 0xc, 0x0, 0x1c, 0, 0x0, 0x1d, CRF_LT);
+ TEST(0x0, 0xc, 0x0, 0x1c, 1, 0x0, 0x1d, CRF_LT);
+
+ /* maximum negative and positive one BCD values */
+ TEST(0x9999999999999999, 0x999999999999999d, 0x0, 0x1c, 0,
+ 0x0, 0xd, (CRF_LT | CRF_SO));
+ TEST(0x9999999999999999, 0x999999999999999d, 0x0, 0x1c, 1,
+ 0x0, 0xd, (CRF_LT | CRF_SO));
}
void test_bcdsub_invalid(void)
{
- __int128 a, b;
-
- /* positive one BCD value */
- a = (__int128) 0x1c;
- b = 0xf00;
-
- TEST(a, b, 0, UNDEF, CRF_SO);
- TEST(a, b, 1, UNDEF, CRF_SO);
-
- TEST(b, a, 0, UNDEF, CRF_SO);
- TEST(b, a, 1, UNDEF, CRF_SO);
+ TEST(0x0, 0x1c, 0x0, 0xf00, 0, UNDEF, UNDEF, CRF_SO);
+ TEST(0x0, 0x1c, 0x0, 0xf00, 1, UNDEF, UNDEF, CRF_SO);
- a = 0xbad;
+ TEST(0x0, 0xf00, 0x0, 0x1c, 0, UNDEF, UNDEF, CRF_SO);
+ TEST(0x0, 0xf00, 0x0, 0x1c, 1, UNDEF, UNDEF, CRF_SO);
- TEST(a, b, 0, UNDEF, CRF_SO);
- TEST(a, b, 1, UNDEF, CRF_SO);
+ TEST(0x0, 0xbad, 0x0, 0xf00, 0, UNDEF, UNDEF, CRF_SO);
+ TEST(0x0, 0xbad, 0x0, 0xf00, 1, UNDEF, UNDEF, CRF_SO);
}
int main(void)
diff --git a/tests/tcg/ppc64le/mtfsf.c b/tests/tcg/ppc64le/mtfsf.c
index b3d31f3637..bed5b1afa4 100644
--- a/tests/tcg/ppc64le/mtfsf.c
+++ b/tests/tcg/ppc64le/mtfsf.c
@@ -1,8 +1,12 @@
#include <stdlib.h>
+#include <stdint.h>
#include <assert.h>
#include <signal.h>
#include <sys/prctl.h>
+#define MTFSF(FLM, FRB) asm volatile ("mtfsf %0, %1" :: "i" (FLM), "f" (FRB))
+#define MFFS(FRT) asm("mffs %0" : "=f" (FRT))
+
#define FPSCR_VE 7 /* Floating-point invalid operation exception enable */
#define FPSCR_VXSOFT 10 /* Floating-point invalid operation exception (soft) */
#define FPSCR_FI 17 /* Floating-point fraction inexact */
@@ -21,10 +25,7 @@ void sigfpe_handler(int sig, siginfo_t *si, void *ucontext)
int main(void)
{
- union {
- double d;
- long long ll;
- } fpscr;
+ uint64_t fpscr;
struct sigaction sa = {
.sa_sigaction = sigfpe_handler,
@@ -40,10 +41,9 @@ int main(void)
prctl(PR_SET_FPEXC, PR_FP_EXC_PRECISE);
/* First test if the FI bit is being set correctly */
- fpscr.ll = FP_FI;
- __builtin_mtfsf(0b11111111, fpscr.d);
- fpscr.d = __builtin_mffs();
- assert((fpscr.ll & FP_FI) != 0);
+ MTFSF(0b11111111, FP_FI);
+ MFFS(fpscr);
+ assert((fpscr & FP_FI) != 0);
/* Then test if the deferred exception is being called correctly */
sigaction(SIGFPE, &sa, NULL);
@@ -54,8 +54,7 @@ int main(void)
* But if a different exception is chosen si_code check should
* change accordingly.
*/
- fpscr.ll = FP_VE | FP_VXSOFT;
- __builtin_mtfsf(0b11111111, fpscr.d);
+ MTFSF(0b11111111, FP_VE | FP_VXSOFT);
return 1;
}
diff --git a/tests/tcg/ppc64le/non_signalling_xscv.c b/tests/tcg/ppc64le/non_signalling_xscv.c
index 91e25cad46..836df71ef0 100644
--- a/tests/tcg/ppc64le/non_signalling_xscv.c
+++ b/tests/tcg/ppc64le/non_signalling_xscv.c
@@ -6,16 +6,16 @@
#define TEST(INSN, B_HI, B_LO, T_HI, T_LO) \
do { \
uint64_t th, tl, bh = B_HI, bl = B_LO; \
- asm("mtvsrd 0, %2\n\t" \
- "mtvsrd 1, %3\n\t" \
- "xxmrghd 0, 0, 1\n\t" \
- INSN " 0, 0\n\t" \
- "mfvsrd %0, 0\n\t" \
- "xxswapd 0, 0\n\t" \
- "mfvsrd %1, 0\n\t" \
+ asm("mtvsrd 32, %2\n\t" \
+ "mtvsrd 33, %3\n\t" \
+ "xxmrghd 32, 32, 33\n\t" \
+ INSN " 32, 32\n\t" \
+ "mfvsrd %0, 32\n\t" \
+ "xxswapd 32, 32\n\t" \
+ "mfvsrd %1, 32\n\t" \
: "=r" (th), "=r" (tl) \
: "r" (bh), "r" (bl) \
- : "vs0", "vs1"); \
+ : "v0", "v1"); \
printf(INSN "(0x%016" PRIx64 "%016" PRIx64 ") = 0x%016" PRIx64 \
"%016" PRIx64 "\n", bh, bl, th, tl); \
assert(th == T_HI && tl == T_LO); \
diff --git a/tests/tcg/s390x/exrl-trt.c b/tests/tcg/s390x/exrl-trt.c
index 16711a3181..451f777b9d 100644
--- a/tests/tcg/s390x/exrl-trt.c
+++ b/tests/tcg/s390x/exrl-trt.c
@@ -5,8 +5,8 @@ int main(void)
{
char op1[] = "hello";
char op2[256];
- uint64_t r1 = 0xffffffffffffffffull;
- uint64_t r2 = 0xffffffffffffffffull;
+ register uint64_t r1 asm("r1") = 0xffffffffffffffffull;
+ register uint64_t r2 asm("r2") = 0xffffffffffffffffull;
uint64_t cc;
int i;
@@ -21,8 +21,6 @@ int main(void)
" j 2f\n"
"1: trt 0(1,%[op1]),%[op2]\n"
"2: exrl %[op1_len],1b\n"
- " lgr %[r1],%%r1\n"
- " lgr %[r2],%%r2\n"
" ipm %[cc]\n"
: [r1] "+r" (r1),
[r2] "+r" (r2),
@@ -30,7 +28,7 @@ int main(void)
: [op1] "a" (&op1),
[op1_len] "a" (5),
[op2] "Q" (op2)
- : "r1", "r2", "cc");
+ : "cc");
cc = (cc >> 28) & 3;
if (cc != 2) {
write(1, "bad cc\n", 7);
diff --git a/tests/tcg/s390x/exrl-trtr.c b/tests/tcg/s390x/exrl-trtr.c
index 5f30cda6bd..422f7f385a 100644
--- a/tests/tcg/s390x/exrl-trtr.c
+++ b/tests/tcg/s390x/exrl-trtr.c
@@ -5,8 +5,8 @@ int main(void)
{
char op1[] = {0, 1, 2, 3};
char op2[256];
- uint64_t r1 = 0xffffffffffffffffull;
- uint64_t r2 = 0xffffffffffffffffull;
+ register uint64_t r1 asm("r1") = 0xffffffffffffffffull;
+ register uint64_t r2 asm("r2") = 0xffffffffffffffffull;
uint64_t cc;
int i;
@@ -21,8 +21,6 @@ int main(void)
" j 2f\n"
"1: trtr 3(1,%[op1]),%[op2]\n"
"2: exrl %[op1_len],1b\n"
- " lgr %[r1],%%r1\n"
- " lgr %[r2],%%r2\n"
" ipm %[cc]\n"
: [r1] "+r" (r1),
[r2] "+r" (r2),
@@ -30,7 +28,7 @@ int main(void)
: [op1] "a" (&op1),
[op1_len] "a" (3),
[op2] "Q" (op2)
- : "r1", "r2", "cc");
+ : "cc");
cc = (cc >> 28) & 3;
if (cc != 1) {
write(1, "bad cc\n", 7);
diff --git a/tests/tcg/s390x/mie3-mvcrl.c b/tests/tcg/s390x/mie3-mvcrl.c
index 57b08e48d0..93c7b0a290 100644
--- a/tests/tcg/s390x/mie3-mvcrl.c
+++ b/tests/tcg/s390x/mie3-mvcrl.c
@@ -1,15 +1,17 @@
#include <stdint.h>
#include <string.h>
+
static inline void mvcrl_8(const char *dst, const char *src)
{
asm volatile (
- "llill %%r0, 8\n"
- ".insn sse, 0xE50A00000000, 0(%[dst]), 0(%[src])"
- : : [dst] "d" (dst), [src] "d" (src)
- : "memory");
+ "llill %%r0, 8\n"
+ ".insn sse, 0xE50A00000000, 0(%[dst]), 0(%[src])"
+ : : [dst] "d" (dst), [src] "d" (src)
+ : "r0", "memory");
}
+
int main(int argc, char *argv[])
{
const char *alpha = "abcdefghijklmnop";
diff --git a/tests/tcg/s390x/mie3-sel.c b/tests/tcg/s390x/mie3-sel.c
index b0c5c9857d..0dfd532ed4 100644
--- a/tests/tcg/s390x/mie3-sel.c
+++ b/tests/tcg/s390x/mie3-sel.c
@@ -1,32 +1,27 @@
#include <stdint.h>
+
#define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \
-{ \
- uint64_t res = 0; \
- asm ( \
- "lg %%r2, %[a]\n" \
- "lg %%r3, %[b]\n" \
- "lg %%r0, %[c]\n" \
- "ltgr %%r0, %%r0\n" \
- ASM \
- "stg %%r0, %[res] " \
- : [res] "=m" (res) \
- : [a] "m" (a), \
- [b] "m" (b), \
- [c] "m" (c) \
- : "r0", "r2", \
- "r3", "r4" \
- ); \
- return res; \
+{ \
+asm volatile ( \
+ "ltgr %[c], %[c]\n" \
+ ASM \
+ : [c] "+r" (c) \
+ : [a] "r" (a) \
+ , [b] "r" (b) \
+); \
+ return c; \
}
-Fi3 (_selre, ".insn rrf, 0xB9F00000, %%r0, %%r3, %%r2, 8\n")
-Fi3 (_selgrz, ".insn rrf, 0xB9E30000, %%r0, %%r3, %%r2, 8\n")
-Fi3 (_selfhrnz, ".insn rrf, 0xB9C00000, %%r0, %%r3, %%r2, 7\n")
+Fi3 (_selre, ".insn rrf, 0xB9F00000, %[c], %[b], %[a], 8\n")
+Fi3 (_selgrz, ".insn rrf, 0xB9E30000, %[c], %[b], %[a], 8\n")
+Fi3 (_selfhrnz, ".insn rrf, 0xB9C00000, %[c], %[b], %[a], 7\n")
+
int main(int argc, char *argv[])
{
uint64_t a = ~0, b = ~0, c = ~0;
+
a = _selre(0x066600000066ull, 0x066600000006ull, a);
b = _selgrz(0xF00D00000005ull, 0xF00D00000055ull, b);
c = _selfhrnz(0x043200000044ull, 0x065400000004ull, c);
diff --git a/tests/tcg/s390x/mvc.c b/tests/tcg/s390x/mvc.c
index aa552d52e5..7ae4c44550 100644
--- a/tests/tcg/s390x/mvc.c
+++ b/tests/tcg/s390x/mvc.c
@@ -20,8 +20,8 @@ static inline void mvc_256(const char *dst, const char *src)
asm volatile (
" mvc 0(256,%[dst]),0(%[src])\n"
:
- : [dst] "d" (dst),
- [src] "d" (src)
+ : [dst] "a" (dst),
+ [src] "a" (src)
: "memory");
}
diff --git a/tests/tcg/s390x/mvo.c b/tests/tcg/s390x/mvo.c
index 5546fe2a97..0c3ecdde2e 100644
--- a/tests/tcg/s390x/mvo.c
+++ b/tests/tcg/s390x/mvo.c
@@ -11,8 +11,8 @@ int main(void)
asm volatile (
" mvo 0(4,%[dest]),0(3,%[src])\n"
:
- : [dest] "d" (dest + 1),
- [src] "d" (src + 1)
+ : [dest] "a" (dest + 1),
+ [src] "a" (src + 1)
: "memory");
for (i = 0; i < sizeof(expected); i++) {
diff --git a/tests/tcg/s390x/pack.c b/tests/tcg/s390x/pack.c
index 4be36f29a7..55e7e214e8 100644
--- a/tests/tcg/s390x/pack.c
+++ b/tests/tcg/s390x/pack.c
@@ -9,7 +9,7 @@ int main(void)
asm volatile(
" pack 2(4,%[data]),2(4,%[data])\n"
:
- : [data] "r" (&data[0])
+ : [data] "a" (&data[0])
: "memory");
for (i = 0; i < 8; i++) {
if (data[i] != exp[i]) {
diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c
index 2a3ef58799..f5e75a96b6 100644
--- a/tests/unit/ptimer-test-stubs.c
+++ b/tests/unit/ptimer-test-stubs.c
@@ -12,7 +12,6 @@
#include "qemu/main-loop.h"
#include "sysemu/replay.h"
#include "migration/vmstate.h"
-#include "sysemu/cpu-timers.h"
#include "ptimer-test.h"
diff --git a/tests/unit/rcutorture.c b/tests/unit/rcutorture.c
index de6f649058..495a4e6f42 100644
--- a/tests/unit/rcutorture.c
+++ b/tests/unit/rcutorture.c
@@ -122,7 +122,7 @@ static void *rcu_read_perf_test(void *arg)
rcu_register_thread();
- *(struct rcu_reader_data **)arg = &rcu_reader;
+ *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
@@ -148,7 +148,7 @@ static void *rcu_update_perf_test(void *arg)
rcu_register_thread();
- *(struct rcu_reader_data **)arg = &rcu_reader;
+ *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
@@ -253,7 +253,7 @@ static void *rcu_read_stress_test(void *arg)
rcu_register_thread();
- *(struct rcu_reader_data **)arg = &rcu_reader;
+ *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
}
@@ -304,7 +304,7 @@ static void *rcu_update_stress_test(void *arg)
struct rcu_stress *cp = qatomic_read(&rcu_stress_current);
rcu_register_thread();
- *(struct rcu_reader_data **)arg = &rcu_reader;
+ *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
@@ -347,7 +347,7 @@ static void *rcu_fake_update_stress_test(void *arg)
{
rcu_register_thread();
- *(struct rcu_reader_data **)arg = &rcu_reader;
+ *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
}
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
index aea660aeed..94718c9319 100644
--- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c
@@ -279,10 +279,10 @@ static void test_sync_op_check(BdrvChild *c)
g_assert_cmpint(ret, ==, -ENOTSUP);
}
-static void test_sync_op_invalidate_cache(BdrvChild *c)
+static void test_sync_op_activate(BdrvChild *c)
{
/* Early success: Image is not inactive */
- bdrv_invalidate_cache(c->bs, NULL);
+ bdrv_activate(c->bs, NULL);
}
@@ -325,8 +325,8 @@ const SyncOpTest sync_op_tests[] = {
.name = "/sync-op/check",
.fn = test_sync_op_check,
}, {
- .name = "/sync-op/invalidate_cache",
- .fn = test_sync_op_invalidate_cache,
+ .name = "/sync-op/activate",
+ .fn = test_sync_op_activate,
},
};
diff --git a/tests/unit/test-rcu-list.c b/tests/unit/test-rcu-list.c
index 49641e1936..64b81ae058 100644
--- a/tests/unit/test-rcu-list.c
+++ b/tests/unit/test-rcu-list.c
@@ -171,7 +171,7 @@ static void *rcu_q_reader(void *arg)
rcu_register_thread();
- *(struct rcu_reader_data **)arg = &rcu_reader;
+ *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (qatomic_read(&goflag) == GOFLAG_INIT) {
g_usleep(1000);
@@ -206,7 +206,7 @@ static void *rcu_q_updater(void *arg)
long long n_removed_local = 0;
struct list_element *el, *prev_el;
- *(struct rcu_reader_data **)arg = &rcu_reader;
+ *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (qatomic_read(&goflag) == GOFLAG_INIT) {
g_usleep(1000);
diff --git a/tests/vm/haiku.x86_64 b/tests/vm/haiku.x86_64
index 2eb736dae1..936f7d2ae2 100755
--- a/tests/vm/haiku.x86_64
+++ b/tests/vm/haiku.x86_64
@@ -2,7 +2,7 @@
#
# Haiku VM image
#
-# Copyright 2020 Haiku, Inc.
+# Copyright 2020-2022 Haiku, Inc.
#
# Authors:
# Alexander von Gluck IV <kallisti5@unixzen.com>
@@ -48,8 +48,8 @@ class HaikuVM(basevm.BaseVM):
name = "haiku"
arch = "x86_64"
- link = "https://app.vagrantup.com/haiku-os/boxes/r1beta2-x86_64/versions/20200702/providers/libvirt.box"
- csum = "41c38b316e0cbdbc66b5dbaf3612b866700a4f35807cb1eb266a5bf83e9e68d5"
+ link = "https://app.vagrantup.com/haiku-os/boxes/r1beta3-x86_64/versions/20220216/providers/libvirt.box"
+ csum = "e67d4aacbcc687013d5cc91990ddd86cc5d70a5d28432ae2691944f8ce5d5041"
poweroff = "shutdown"
@@ -99,7 +99,7 @@ class HaikuVM(basevm.BaseVM):
self.print_step("Extracting disk image")
- subprocess.check_call(["tar", "xzf", tarball, "./box.img", "-O"],
+ subprocess.check_call(["tar", "xzf", tarball, "box.img", "-O"],
stdout=open(img, 'wb'))
self.print_step("Preparing disk image")
diff --git a/ui/clipboard.c b/ui/clipboard.c
index 5f15cf853d..9079ef829b 100644
--- a/ui/clipboard.c
+++ b/ui/clipboard.c
@@ -66,8 +66,10 @@ void qemu_clipboard_update(QemuClipboardInfo *info)
notifier_list_notify(&clipboard_notifiers, &notify);
- qemu_clipboard_info_unref(cbinfo[info->selection]);
- cbinfo[info->selection] = qemu_clipboard_info_ref(info);
+ if (cbinfo[info->selection] != info) {
+ qemu_clipboard_info_unref(cbinfo[info->selection]);
+ cbinfo[info->selection] = qemu_clipboard_info_ref(info);
+ }
}
QemuClipboardInfo *qemu_clipboard_info(QemuClipboardSelection selection)
diff --git a/ui/cocoa.m b/ui/cocoa.m
index b6e70e9134..c88149852b 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -83,7 +83,7 @@ static void cocoa_switch(DisplayChangeListener *dcl,
static void cocoa_refresh(DisplayChangeListener *dcl);
-static NSWindow *normalWindow, *about_window;
+static NSWindow *normalWindow;
static const DisplayChangeListenerOps dcl_ops = {
.dpy_name = "cocoa",
.dpy_gfx_update = cocoa_update,
@@ -1140,7 +1140,6 @@ QemuCocoaView *cocoaView;
- (BOOL)verifyQuit;
- (void)openDocumentation:(NSString *)filename;
- (IBAction) do_about_menu_item: (id) sender;
-- (void)make_about_window;
- (void)adjustSpeed:(id)sender;
@end
@@ -1186,8 +1185,6 @@ QemuCocoaView *cocoaView;
[pauseLabel setFont: [NSFont fontWithName: @"Helvetica" size: 90]];
[pauseLabel setTextColor: [NSColor blackColor]];
[pauseLabel sizeToFit];
-
- [self make_about_window];
}
return self;
}
@@ -1471,92 +1468,29 @@ QemuCocoaView *cocoaView;
/* The action method for the About menu item */
- (IBAction) do_about_menu_item: (id) sender
{
- [about_window makeKeyAndOrderFront: nil];
-}
-
-/* Create and display the about dialog */
-- (void)make_about_window
-{
- /* Make the window */
- int x = 0, y = 0, about_width = 400, about_height = 200;
- NSRect window_rect = NSMakeRect(x, y, about_width, about_height);
- about_window = [[NSWindow alloc] initWithContentRect:window_rect
- styleMask:NSWindowStyleMaskTitled | NSWindowStyleMaskClosable |
- NSWindowStyleMaskMiniaturizable
- backing:NSBackingStoreBuffered
- defer:NO];
- [about_window setTitle: @"About"];
- [about_window setReleasedWhenClosed: NO];
- [about_window center];
- NSView *superView = [about_window contentView];
-
- /* Create the dimensions of the picture */
- int picture_width = 80, picture_height = 80;
- x = (about_width - picture_width)/2;
- y = about_height - picture_height - 10;
- NSRect picture_rect = NSMakeRect(x, y, picture_width, picture_height);
-
- /* Make the picture of QEMU */
- NSImageView *picture_view = [[NSImageView alloc] initWithFrame:
- picture_rect];
- char *qemu_image_path_c = get_relocated_path(CONFIG_QEMU_ICONDIR "/hicolor/512x512/apps/qemu.png");
- NSString *qemu_image_path = [NSString stringWithUTF8String:qemu_image_path_c];
- g_free(qemu_image_path_c);
- NSImage *qemu_image = [[NSImage alloc] initWithContentsOfFile:qemu_image_path];
- [picture_view setImage: qemu_image];
- [picture_view setImageScaling: NSImageScaleProportionallyUpOrDown];
- [superView addSubview: picture_view];
-
- /* Make the name label */
- NSBundle *bundle = [NSBundle mainBundle];
- if (bundle) {
- x = 0;
- y = y - 25;
- int name_width = about_width, name_height = 20;
- NSRect name_rect = NSMakeRect(x, y, name_width, name_height);
- NSTextField *name_label = [[NSTextField alloc] initWithFrame: name_rect];
- [name_label setEditable: NO];
- [name_label setBezeled: NO];
- [name_label setDrawsBackground: NO];
- [name_label setAlignment: NSTextAlignmentCenter];
- NSString *qemu_name = [[bundle executablePath] lastPathComponent];
- [name_label setStringValue: qemu_name];
- [superView addSubview: name_label];
+ NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
+ char *icon_path_c = get_relocated_path(CONFIG_QEMU_ICONDIR "/hicolor/512x512/apps/qemu.png");
+ NSString *icon_path = [NSString stringWithUTF8String:icon_path_c];
+ g_free(icon_path_c);
+ NSImage *icon = [[NSImage alloc] initWithContentsOfFile:icon_path];
+ NSString *version = @"QEMU emulator version " QEMU_FULL_VERSION;
+ NSString *copyright = @QEMU_COPYRIGHT;
+ NSDictionary *options;
+ if (icon) {
+ options = @{
+ NSAboutPanelOptionApplicationIcon : icon,
+ NSAboutPanelOptionApplicationVersion : version,
+ @"Copyright" : copyright,
+ };
+ [icon release];
+ } else {
+ options = @{
+ NSAboutPanelOptionApplicationVersion : version,
+ @"Copyright" : copyright,
+ };
}
-
- /* Set the version label's attributes */
- x = 0;
- y = 50;
- int version_width = about_width, version_height = 20;
- NSRect version_rect = NSMakeRect(x, y, version_width, version_height);
- NSTextField *version_label = [[NSTextField alloc] initWithFrame:
- version_rect];
- [version_label setEditable: NO];
- [version_label setBezeled: NO];
- [version_label setAlignment: NSTextAlignmentCenter];
- [version_label setDrawsBackground: NO];
-
- /* Create the version string*/
- NSString *version_string;
- version_string = [[NSString alloc] initWithFormat:
- @"QEMU emulator version %s", QEMU_FULL_VERSION];
- [version_label setStringValue: version_string];
- [superView addSubview: version_label];
-
- /* Make copyright label */
- x = 0;
- y = 35;
- int copyright_width = about_width, copyright_height = 20;
- NSRect copyright_rect = NSMakeRect(x, y, copyright_width, copyright_height);
- NSTextField *copyright_label = [[NSTextField alloc] initWithFrame:
- copyright_rect];
- [copyright_label setEditable: NO];
- [copyright_label setBezeled: NO];
- [copyright_label setDrawsBackground: NO];
- [copyright_label setAlignment: NSTextAlignmentCenter];
- [copyright_label setStringValue: [NSString stringWithFormat: @"%s",
- QEMU_COPYRIGHT]];
- [superView addSubview: copyright_label];
+ [NSApp orderFrontStandardAboutPanelWithOptions:options];
+ [pool release];
}
/* Used by the Speed menu items */
@@ -1611,11 +1545,15 @@ static void create_initial_menus(void)
NSMenuItem *menuItem;
[NSApp setMainMenu:[[NSMenu alloc] init]];
+ [NSApp setServicesMenu:[[NSMenu alloc] initWithTitle:@"Services"]];
// Application menu
menu = [[NSMenu alloc] initWithTitle:@""];
[menu addItemWithTitle:@"About QEMU" action:@selector(do_about_menu_item:) keyEquivalent:@""]; // About QEMU
[menu addItem:[NSMenuItem separatorItem]]; //Separator
+ menuItem = [menu addItemWithTitle:@"Services" action:nil keyEquivalent:@""];
+ [menuItem setSubmenu:[NSApp servicesMenu]];
+ [menu addItem:[NSMenuItem separatorItem]];
[menu addItemWithTitle:@"Hide QEMU" action:@selector(hide:) keyEquivalent:@"h"]; //Hide QEMU
menuItem = (NSMenuItem *)[menu addItemWithTitle:@"Hide Others" action:@selector(hideOtherApplications:) keyEquivalent:@"h"]; // Hide Others
[menuItem setKeyEquivalentModifierMask:(NSEventModifierFlagOption|NSEventModifierFlagCommand)];
diff --git a/ui/console-gl.c b/ui/console-gl.c
index 7c9894a51d..8e3c9a3c8c 100644
--- a/ui/console-gl.c
+++ b/ui/console-gl.c
@@ -49,6 +49,10 @@ void surface_gl_create_texture(QemuGLShader *gls,
assert(gls);
assert(QEMU_IS_ALIGNED(surface_stride(surface), surface_bytes_per_pixel(surface)));
+ if (surface->texture) {
+ return;
+ }
+
switch (surface->format) {
case PIXMAN_BE_b8g8r8x8:
case PIXMAN_BE_b8g8r8a8:
diff --git a/ui/console.c b/ui/console.c
index 40eebb6d2c..365a2c14b8 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1860,7 +1860,9 @@ void dpy_gl_scanout_disable(QemuConsole *con)
con->scanout.kind = SCANOUT_NONE;
}
QLIST_FOREACH(dcl, &s->listeners, next) {
- dcl->ops->dpy_gl_scanout_disable(dcl);
+ if (dcl->ops->dpy_gl_scanout_disable) {
+ dcl->ops->dpy_gl_scanout_disable(dcl);
+ }
}
}
@@ -1881,10 +1883,12 @@ void dpy_gl_scanout_texture(QemuConsole *con,
x, y, width, height
};
QLIST_FOREACH(dcl, &s->listeners, next) {
- dcl->ops->dpy_gl_scanout_texture(dcl, backing_id,
- backing_y_0_top,
- backing_width, backing_height,
- x, y, width, height);
+ if (dcl->ops->dpy_gl_scanout_texture) {
+ dcl->ops->dpy_gl_scanout_texture(dcl, backing_id,
+ backing_y_0_top,
+ backing_width, backing_height,
+ x, y, width, height);
+ }
}
}
@@ -1897,7 +1901,9 @@ void dpy_gl_scanout_dmabuf(QemuConsole *con,
con->scanout.kind = SCANOUT_DMABUF;
con->scanout.dmabuf = dmabuf;
QLIST_FOREACH(dcl, &s->listeners, next) {
- dcl->ops->dpy_gl_scanout_dmabuf(dcl, dmabuf);
+ if (dcl->ops->dpy_gl_scanout_dmabuf) {
+ dcl->ops->dpy_gl_scanout_dmabuf(dcl, dmabuf);
+ }
}
}
@@ -1951,7 +1957,9 @@ void dpy_gl_update(QemuConsole *con,
graphic_hw_gl_block(con, true);
QLIST_FOREACH(dcl, &s->listeners, next) {
- dcl->ops->dpy_gl_update(dcl, x, y, w, h);
+ if (dcl->ops->dpy_gl_update) {
+ dcl->ops->dpy_gl_update(dcl, x, y, w, h);
+ }
}
graphic_hw_gl_block(con, false);
}
@@ -2392,13 +2400,12 @@ static void vc_chr_open(Chardev *chr,
void qemu_console_resize(QemuConsole *s, int width, int height)
{
- DisplaySurface *surface = qemu_console_surface(s);
+ DisplaySurface *surface;
assert(s->console_type == GRAPHIC_CONSOLE);
- if (surface && (surface->flags & QEMU_ALLOCATED_FLAG) &&
- pixman_image_get_width(surface->image) == width &&
- pixman_image_get_height(surface->image) == height) {
+ if (qemu_console_get_width(s, -1) == width &&
+ qemu_console_get_height(s, -1) == height) {
return;
}
diff --git a/util/async.c b/util/async.c
index 08d25feef5..2ea1172f3e 100644
--- a/util/async.c
+++ b/util/async.c
@@ -32,6 +32,7 @@
#include "qemu/rcu_queue.h"
#include "block/raw-aio.h"
#include "qemu/coroutine_int.h"
+#include "qemu/coroutine-tls.h"
#include "trace.h"
/***********************************************************/
@@ -675,12 +676,13 @@ void aio_context_release(AioContext *ctx)
qemu_rec_mutex_unlock(&ctx->lock);
}
-static __thread AioContext *my_aiocontext;
+QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
AioContext *qemu_get_current_aio_context(void)
{
- if (my_aiocontext) {
- return my_aiocontext;
+ AioContext *ctx = get_my_aiocontext();
+ if (ctx) {
+ return ctx;
}
if (qemu_mutex_iothread_locked()) {
/* Possibly in a vCPU thread. */
@@ -691,6 +693,6 @@ AioContext *qemu_get_current_aio_context(void)
void qemu_set_current_aio_context(AioContext *ctx)
{
- assert(!my_aiocontext);
- my_aiocontext = ctx;
+ assert(!get_my_aiocontext());
+ set_my_aiocontext(ctx);
}
diff --git a/util/atomic64.c b/util/atomic64.c
index 22983a970f..c20d071d8e 100644
--- a/util/atomic64.c
+++ b/util/atomic64.c
@@ -8,6 +8,7 @@
#include "qemu/atomic.h"
#include "qemu/thread.h"
#include "qemu/cacheinfo.h"
+#include "qemu/memalign.h"
#ifdef CONFIG_ATOMIC64
#error This file must only be compiled if !CONFIG_ATOMIC64
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 305b894a63..dd0501d9a7 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -301,6 +301,39 @@ bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t start, int64_t end,
return true;
}
+bool hbitmap_status(const HBitmap *hb, int64_t start, int64_t count,
+ int64_t *pnum)
+{
+ int64_t next_dirty, next_zero;
+
+ assert(start >= 0);
+ assert(count > 0);
+ assert(start + count <= hb->orig_size);
+
+ next_dirty = hbitmap_next_dirty(hb, start, count);
+ if (next_dirty == -1) {
+ *pnum = count;
+ return false;
+ }
+
+ if (next_dirty > start) {
+ *pnum = next_dirty - start;
+ return false;
+ }
+
+ assert(next_dirty == start);
+
+ next_zero = hbitmap_next_zero(hb, start, count);
+ if (next_zero == -1) {
+ *pnum = count;
+ return true;
+ }
+
+ assert(next_zero > start);
+ *pnum = next_zero - start;
+ return false;
+}
+
bool hbitmap_empty(const HBitmap *hb)
{
return hb->count == 0;
diff --git a/util/memalign.c b/util/memalign.c
new file mode 100644
index 0000000000..c199ae7073
--- /dev/null
+++ b/util/memalign.c
@@ -0,0 +1,92 @@
+/*
+ * memalign.c: Allocate an aligned memory region
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010-2016 Red Hat, Inc.
+ * Copyright (c) 2022 Linaro Ltd
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "qemu/memalign.h"
+#include "trace.h"
+
+void *qemu_try_memalign(size_t alignment, size_t size)
+{
+ void *ptr;
+
+ if (alignment < sizeof(void*)) {
+ alignment = sizeof(void*);
+ } else {
+ g_assert(is_power_of_2(alignment));
+ }
+
+ /*
+ * Handling of 0 allocations varies among the different
+ * platform APIs (for instance _aligned_malloc() will
+ * fail) -- ensure that we always return a valid non-NULL
+ * pointer that can be freed by qemu_vfree().
+ */
+ if (size == 0) {
+ size++;
+ }
+#if defined(CONFIG_POSIX_MEMALIGN)
+ int ret;
+ ret = posix_memalign(&ptr, alignment, size);
+ if (ret != 0) {
+ errno = ret;
+ ptr = NULL;
+ }
+#elif defined(CONFIG_ALIGNED_MALLOC)
+ ptr = _aligned_malloc(size, alignment);
+#elif defined(CONFIG_VALLOC)
+ ptr = valloc(size);
+#elif defined(CONFIG_MEMALIGN)
+ ptr = memalign(alignment, size);
+#else
+ #error No function to allocate aligned memory available
+#endif
+ trace_qemu_memalign(alignment, size, ptr);
+ return ptr;
+}
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+ void *p = qemu_try_memalign(alignment, size);
+ if (p) {
+ return p;
+ }
+ fprintf(stderr,
+ "qemu_memalign: failed to allocate %zu bytes at alignment %zu: %s\n",
+ size, alignment, strerror(errno));
+ abort();
+}
+
+void qemu_vfree(void *ptr)
+{
+ trace_qemu_vfree(ptr);
+#if !defined(CONFIG_POSIX_MEMALIGN) && defined(CONFIG_ALIGNED_MALLOC)
+ /* Only Windows _aligned_malloc needs a special free function */
+ _aligned_free(ptr);
+#else
+ free(ptr);
+#endif
+}
diff --git a/util/meson.build b/util/meson.build
index 3736988b9f..f6ee74ad0c 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -51,6 +51,7 @@ util_ss.add(when: 'CONFIG_POSIX', if_true: files('drm.c'))
util_ss.add(files('guest-random.c'))
util_ss.add(files('yank.c'))
util_ss.add(files('int128.c'))
+util_ss.add(files('memalign.c'))
if have_user
util_ss.add(files('selfmap.c'))
diff --git a/util/osdep.c b/util/osdep.c
index 723cdcb004..7c4deda6fe 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -33,7 +33,6 @@
extern int madvise(char *, size_t, int);
#endif
-#include <dirent.h>
#include "qemu-common.h"
#include "qemu/cutils.h"
#include "qemu/sockets.h"
@@ -619,23 +618,3 @@ writev(int fd, const struct iovec *iov, int iov_cnt)
return readv_writev(fd, iov, iov_cnt, true);
}
#endif
-
-struct dirent *
-qemu_dirent_dup(struct dirent *dent)
-{
- size_t sz = 0;
-#if defined _DIRENT_HAVE_D_RECLEN
- /* Avoid use of strlen() if platform supports d_reclen. */
- sz = dent->d_reclen;
-#endif
- /*
- * Test sz for zero even if d_reclen is available
- * because some drivers may set d_reclen to zero.
- */
- if (sz == 0) {
- /* Fallback to the most portable way. */
- sz = offsetof(struct dirent, d_name) +
- strlen(dent->d_name) + 1;
- }
- return g_memdup(dent, sz);
-}
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index f2be7321c5..2ebfb75057 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -199,46 +199,6 @@ fail_close:
return false;
}
-void *qemu_oom_check(void *ptr)
-{
- if (ptr == NULL) {
- fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
- abort();
- }
- return ptr;
-}
-
-void *qemu_try_memalign(size_t alignment, size_t size)
-{
- void *ptr;
-
- if (alignment < sizeof(void*)) {
- alignment = sizeof(void*);
- } else {
- g_assert(is_power_of_2(alignment));
- }
-
-#if defined(CONFIG_POSIX_MEMALIGN)
- int ret;
- ret = posix_memalign(&ptr, alignment, size);
- if (ret != 0) {
- errno = ret;
- ptr = NULL;
- }
-#elif defined(CONFIG_BSD)
- ptr = valloc(size);
-#else
- ptr = memalign(alignment, size);
-#endif
- trace_qemu_memalign(alignment, size, ptr);
- return ptr;
-}
-
-void *qemu_memalign(size_t alignment, size_t size)
-{
- return qemu_oom_check(qemu_try_memalign(alignment, size));
-}
-
/* alloc shared memory pages */
void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared,
bool noreserve)
@@ -260,12 +220,6 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared,
return ptr;
}
-void qemu_vfree(void *ptr)
-{
- trace_qemu_vfree(ptr);
- free(ptr);
-}
-
void qemu_anon_ram_free(void *ptr, size_t size)
{
trace_qemu_anon_ram_free(ptr, size);
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index af559ef339..4b1ce0be4b 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -44,35 +44,6 @@
/* this must come after including "trace.h" */
#include <shlobj.h>
-void *qemu_oom_check(void *ptr)
-{
- if (ptr == NULL) {
- fprintf(stderr, "Failed to allocate memory: %lu\n", GetLastError());
- abort();
- }
- return ptr;
-}
-
-void *qemu_try_memalign(size_t alignment, size_t size)
-{
- void *ptr;
-
- g_assert(size != 0);
- if (alignment < sizeof(void *)) {
- alignment = sizeof(void *);
- } else {
- g_assert(is_power_of_2(alignment));
- }
- ptr = _aligned_malloc(size, alignment);
- trace_qemu_memalign(alignment, size, ptr);
- return ptr;
-}
-
-void *qemu_memalign(size_t alignment, size_t size)
-{
- return qemu_oom_check(qemu_try_memalign(alignment, size));
-}
-
static int get_allocation_granularity(void)
{
SYSTEM_INFO system_info;
@@ -104,12 +75,6 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
return ptr;
}
-void qemu_vfree(void *ptr)
-{
- trace_qemu_vfree(ptr);
- _aligned_free(ptr);
-}
-
void qemu_anon_ram_free(void *ptr, size_t size)
{
trace_qemu_anon_ram_free(ptr, size);
diff --git a/util/qht.c b/util/qht.c
index 079605121b..065fc501f4 100644
--- a/util/qht.c
+++ b/util/qht.c
@@ -69,6 +69,7 @@
#include "qemu/qht.h"
#include "qemu/atomic.h"
#include "qemu/rcu.h"
+#include "qemu/memalign.h"
//#define QHT_DEBUG
diff --git a/util/rcu.c b/util/rcu.c
index c91da9f137..b6d6c71cff 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -65,7 +65,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr)
/* Written to only by each individual reader. Read by both the reader and the
* writers.
*/
-__thread struct rcu_reader_data rcu_reader;
+QEMU_DEFINE_CO_TLS(struct rcu_reader_data, rcu_reader)
/* Protected by rcu_registry_lock. */
typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
@@ -355,23 +355,23 @@ void drain_call_rcu(void)
void rcu_register_thread(void)
{
- assert(rcu_reader.ctr == 0);
+ assert(get_ptr_rcu_reader()->ctr == 0);
qemu_mutex_lock(&rcu_registry_lock);
- QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
+ QLIST_INSERT_HEAD(&registry, get_ptr_rcu_reader(), node);
qemu_mutex_unlock(&rcu_registry_lock);
}
void rcu_unregister_thread(void)
{
qemu_mutex_lock(&rcu_registry_lock);
- QLIST_REMOVE(&rcu_reader, node);
+ QLIST_REMOVE(get_ptr_rcu_reader(), node);
qemu_mutex_unlock(&rcu_registry_lock);
}
void rcu_add_force_rcu_notifier(Notifier *n)
{
qemu_mutex_lock(&rcu_registry_lock);
- notifier_list_add(&rcu_reader.force_rcu, n);
+ notifier_list_add(&get_ptr_rcu_reader()->force_rcu, n);
qemu_mutex_unlock(&rcu_registry_lock);
}