diff options
367 files changed, 12087 insertions, 5534 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 83a9f79c1c..38d1ac8803 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2514,9 +2514,12 @@ F: block/stream.c F: block/mirror.c F: qapi/job.json F: block/block-copy.c -F: include/block/block-copy.c +F: include/block/block-copy.h +F: block/reqlist.c +F: include/block/reqlist.h F: block/copy-before-write.h F: block/copy-before-write.c +F: block/snapshot-access.c F: include/block/aio_task.h F: block/aio_task.c F: util/qemu-co-shared-resource.c @@ -3607,7 +3610,8 @@ FreeBSD Hosted Continuous Integration M: Ed Maste <emaste@freebsd.org> M: Li-Wen Hsu <lwhsu@freebsd.org> S: Maintained -F: .cirrus.yml +F: .gitlab-ci.d/cirrus/freebsd* +F: tests/vm/freebsd W: https://cirrus-ci.com/github/qemu/qemu Windows Hosted Continuous Integration diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c index 7516c67a3f..c4244a23c6 100644 --- a/accel/kvm/kvm-accel-ops.c +++ b/accel/kvm/kvm-accel-ops.c @@ -74,11 +74,23 @@ static void kvm_start_vcpu_thread(CPUState *cpu) cpu, QEMU_THREAD_JOINABLE); } +static bool kvm_vcpu_thread_is_idle(CPUState *cpu) +{ + return !kvm_halt_in_kernel(); +} + +static bool kvm_cpus_are_resettable(void) +{ + return !kvm_enabled() || kvm_cpu_check_are_resettable(); +} + static void kvm_accel_ops_class_init(ObjectClass *oc, void *data) { AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); ops->create_vcpu_thread = kvm_start_vcpu_thread; + ops->cpu_thread_is_idle = kvm_vcpu_thread_is_idle; + ops->cpus_are_resettable = kvm_cpus_are_resettable; ops->synchronize_post_reset = kvm_cpu_synchronize_post_reset; ops->synchronize_post_init = kvm_cpu_synchronize_post_init; ops->synchronize_state = kvm_cpu_synchronize_state; diff --git a/accel/meson.build b/accel/meson.build index dfd808d2c8..b9a963cf80 100644 --- a/accel/meson.build +++ b/accel/meson.build @@ -2,12 +2,14 @@ specific_ss.add(files('accel-common.c')) softmmu_ss.add(files('accel-softmmu.c')) user_ss.add(files('accel-user.c')) -subdir('hvf') -subdir('qtest') -subdir('kvm') subdir('tcg') -subdir('xen') -subdir('stubs') +if have_system + subdir('hvf') + subdir('qtest') + subdir('kvm') + subdir('xen') + subdir('stubs') +endif dummy_ss = ss.source_set() dummy_ss.add(files( diff --git a/accel/qtest/qtest.c b/accel/qtest/qtest.c index 7e6b8110d5..f6056ac836 100644 --- a/accel/qtest/qtest.c +++ b/accel/qtest/qtest.c @@ -20,7 +20,6 @@ #include "qemu/accel.h" #include "sysemu/qtest.h" #include "sysemu/cpus.h" -#include "sysemu/cpu-timers.h" #include "qemu/guest-random.h" #include "qemu/main-loop.h" #include "hw/core/cpu.h" diff --git a/accel/stubs/hax-stub.c b/accel/stubs/hax-stub.c index 49077f88e3..2fe31aaa9a 100644 --- a/accel/stubs/hax-stub.c +++ b/accel/stubs/hax-stub.c @@ -16,6 +16,8 @@ #include "qemu/osdep.h" #include "sysemu/hax.h" +bool hax_allowed; + int hax_sync_vcpus(void) { return 0; diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index 5319573e00..7e0fb884b9 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -12,10 +12,7 @@ #include "qemu/osdep.h" #include "sysemu/kvm.h" - -#ifndef CONFIG_USER_ONLY #include "hw/pci/msi.h" -#endif KVMState *kvm_state; bool kvm_kernel_irqchip; @@ -80,7 +77,6 @@ int kvm_on_sigbus(int code, void *addr) return 1; } -#ifndef CONFIG_USER_ONLY int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) { return -ENOSYS; @@ -152,4 +148,3 @@ bool kvm_dirty_ring_enabled(void) { return false; } -#endif diff --git a/accel/stubs/meson.build b/accel/stubs/meson.build index 12dd1539af..0249b9258f 100644 --- a/accel/stubs/meson.build +++ b/accel/stubs/meson.build @@ -1,4 +1,7 @@ -specific_ss.add(when: 'CONFIG_HAX', if_false: files('hax-stub.c')) -specific_ss.add(when: 'CONFIG_XEN', if_false: files('xen-stub.c')) -specific_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c')) -specific_ss.add(when: 'CONFIG_TCG', if_false: files('tcg-stub.c')) +sysemu_stubs_ss = ss.source_set() +sysemu_stubs_ss.add(when: 'CONFIG_HAX', if_false: files('hax-stub.c')) +sysemu_stubs_ss.add(when: 'CONFIG_XEN', if_false: files('xen-stub.c')) +sysemu_stubs_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c')) +sysemu_stubs_ss.add(when: 'CONFIG_TCG', if_false: files('tcg-stub.c')) + +specific_ss.add_all(when: ['CONFIG_SOFTMMU'], if_true: sysemu_stubs_ss) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index c68270f794..c997c2e8e0 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -422,7 +422,7 @@ static void cpu_exec_exit(CPUState *cpu) void cpu_exec_step_atomic(CPUState *cpu) { - CPUArchState *env = (CPUArchState *)cpu->env_ptr; + CPUArchState *env = cpu->env_ptr; TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags, cflags; @@ -532,7 +532,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, struct tb_desc desc; uint32_t h; - desc.env = (CPUArchState *)cpu->env_ptr; + desc.env = cpu->env_ptr; desc.cs_base = cs_base; desc.flags = flags; desc.cflags = cflags; diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c index ea42d1d51b..bdaf2c943b 100644 --- a/accel/tcg/tcg-accel-ops-icount.c +++ b/accel/tcg/tcg-accel-ops-icount.c @@ -27,6 +27,7 @@ #include "qemu-common.h" #include "sysemu/tcg.h" #include "sysemu/replay.h" +#include "sysemu/cpu-timers.h" #include "qemu/main-loop.h" #include "qemu/guest-random.h" #include "exec/exec-all.h" diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c index 29632bd4c0..dc421c8fd7 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.c +++ b/accel/tcg/tcg-accel-ops-mttcg.c @@ -27,6 +27,7 @@ #include "qemu-common.h" #include "sysemu/tcg.h" #include "sysemu/replay.h" +#include "sysemu/cpu-timers.h" #include "qemu/main-loop.h" #include "qemu/notify.h" #include "qemu/guest-random.h" diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index bf59f53dbc..a805fb6bdd 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -27,6 +27,7 @@ #include "qemu-common.h" #include "sysemu/tcg.h" #include "sysemu/replay.h" +#include "sysemu/cpu-timers.h" #include "qemu/main-loop.h" #include "qemu/notify.h" #include "qemu/guest-random.h" diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index 1a8e8390bd..ea7dcad674 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -29,6 +29,7 @@ #include "qemu-common.h" #include "sysemu/tcg.h" #include "sysemu/replay.h" +#include "sysemu/cpu-timers.h" #include "qemu/main-loop.h" #include "qemu/guest-random.h" #include "exec/exec-all.h" diff --git a/audio/alsaaudio.c b/audio/alsaaudio.c index 2b9789e647..b04716a6cc 100644 --- a/audio/alsaaudio.c +++ b/audio/alsaaudio.c @@ -916,6 +916,7 @@ static struct audio_pcm_ops alsa_pcm_ops = { .init_out = alsa_init_out, .fini_out = alsa_fini_out, .write = alsa_write, + .buffer_get_free = audio_generic_buffer_get_free, .run_buffer_out = audio_generic_run_buffer_out, .enable_out = alsa_enable_out, diff --git a/audio/audio.c b/audio/audio.c index dc28685d22..a88572e713 100644 --- a/audio/audio.c +++ b/audio/audio.c @@ -548,65 +548,45 @@ static size_t audio_pcm_hw_get_live_in(HWVoiceIn *hw) return live; } -static void audio_pcm_hw_clip_out(HWVoiceOut *hw, void *pcm_buf, size_t len) +static size_t audio_pcm_hw_conv_in(HWVoiceIn *hw, void *pcm_buf, size_t samples) { - size_t clipped = 0; - size_t pos = hw->mix_buf->pos; - - while (len) { - st_sample *src = hw->mix_buf->samples + pos; - uint8_t *dst = advance(pcm_buf, clipped * hw->info.bytes_per_frame); - size_t samples_till_end_of_buf = hw->mix_buf->size - pos; - size_t samples_to_clip = MIN(len, samples_till_end_of_buf); + size_t conv = 0; + STSampleBuffer *conv_buf = hw->conv_buf; - hw->clip(dst, src, samples_to_clip); + while (samples) { + uint8_t *src = advance(pcm_buf, conv * hw->info.bytes_per_frame); + size_t proc = MIN(samples, conv_buf->size - conv_buf->pos); - pos = (pos + samples_to_clip) % hw->mix_buf->size; - len -= samples_to_clip; - clipped += samples_to_clip; + hw->conv(conv_buf->samples + conv_buf->pos, src, proc); + conv_buf->pos = (conv_buf->pos + proc) % conv_buf->size; + samples -= proc; + conv += proc; } + + return conv; } /* * Soft voice (capture) */ -static size_t audio_pcm_sw_get_rpos_in(SWVoiceIn *sw) -{ - HWVoiceIn *hw = sw->hw; - ssize_t live = hw->total_samples_captured - sw->total_hw_samples_acquired; - ssize_t rpos; - - if (audio_bug(__func__, live < 0 || live > hw->conv_buf->size)) { - dolog("live=%zu hw->conv_buf->size=%zu\n", live, hw->conv_buf->size); - return 0; - } - - rpos = hw->conv_buf->pos - live; - if (rpos >= 0) { - return rpos; - } else { - return hw->conv_buf->size + rpos; - } -} - static size_t audio_pcm_sw_read(SWVoiceIn *sw, void *buf, size_t size) { HWVoiceIn *hw = sw->hw; size_t samples, live, ret = 0, swlim, isamp, osamp, rpos, total = 0; struct st_sample *src, *dst = sw->buf; - rpos = audio_pcm_sw_get_rpos_in(sw) % hw->conv_buf->size; - live = hw->total_samples_captured - sw->total_hw_samples_acquired; + if (!live) { + return 0; + } if (audio_bug(__func__, live > hw->conv_buf->size)) { dolog("live_in=%zu hw->conv_buf->size=%zu\n", live, hw->conv_buf->size); return 0; } + rpos = audio_ring_posb(hw->conv_buf->pos, live, hw->conv_buf->size); + samples = size / sw->info.bytes_per_frame; - if (!live) { - return 0; - } swlim = (live * sw->ratio) >> 32; swlim = MIN (swlim, samples); @@ -632,7 +612,7 @@ static size_t audio_pcm_sw_read(SWVoiceIn *sw, void *buf, size_t size) total += isamp; } - if (hw->pcm_ops && !hw->pcm_ops->volume_in) { + if (!hw->pcm_ops->volume_in) { mixeng_volume (sw->buf, ret, &sw->vol); } @@ -683,12 +663,38 @@ static size_t audio_pcm_hw_get_live_out (HWVoiceOut *hw, int *nb_live) return 0; } +static size_t audio_pcm_hw_get_free(HWVoiceOut *hw) +{ + return (hw->pcm_ops->buffer_get_free ? hw->pcm_ops->buffer_get_free(hw) : + INT_MAX) / hw->info.bytes_per_frame; +} + +static void audio_pcm_hw_clip_out(HWVoiceOut *hw, void *pcm_buf, size_t len) +{ + size_t clipped = 0; + size_t pos = hw->mix_buf->pos; + + while (len) { + st_sample *src = hw->mix_buf->samples + pos; + uint8_t *dst = advance(pcm_buf, clipped * hw->info.bytes_per_frame); + size_t samples_till_end_of_buf = hw->mix_buf->size - pos; + size_t samples_to_clip = MIN(len, samples_till_end_of_buf); + + hw->clip(dst, src, samples_to_clip); + + pos = (pos + samples_to_clip) % hw->mix_buf->size; + len -= samples_to_clip; + clipped += samples_to_clip; + } +} + /* * Soft voice (playback) */ static size_t audio_pcm_sw_write(SWVoiceOut *sw, void *buf, size_t size) { - size_t hwsamples, samples, isamp, osamp, wpos, live, dead, left, swlim, blck; + size_t hwsamples, samples, isamp, osamp, wpos, live, dead, left, blck; + size_t hw_free; size_t ret = 0, pos = 0, total = 0; if (!sw) { @@ -711,27 +717,28 @@ static size_t audio_pcm_sw_write(SWVoiceOut *sw, void *buf, size_t size) } wpos = (sw->hw->mix_buf->pos + live) % hwsamples; - samples = size / sw->info.bytes_per_frame; dead = hwsamples - live; - swlim = ((int64_t) dead << 32) / sw->ratio; - swlim = MIN (swlim, samples); - if (swlim) { - sw->conv (sw->buf, buf, swlim); + hw_free = audio_pcm_hw_get_free(sw->hw); + hw_free = hw_free > live ? hw_free - live : 0; + samples = ((int64_t)MIN(dead, hw_free) << 32) / sw->ratio; + samples = MIN(samples, size / sw->info.bytes_per_frame); + if (samples) { + sw->conv(sw->buf, buf, samples); - if (sw->hw->pcm_ops && !sw->hw->pcm_ops->volume_out) { - mixeng_volume (sw->buf, swlim, &sw->vol); + if (!sw->hw->pcm_ops->volume_out) { + mixeng_volume(sw->buf, samples, &sw->vol); } } - while (swlim) { + while (samples) { dead = hwsamples - live; left = hwsamples - wpos; blck = MIN (dead, left); if (!blck) { break; } - isamp = swlim; + isamp = samples; osamp = blck; st_rate_flow_mix ( sw->rate, @@ -741,7 +748,7 @@ static size_t audio_pcm_sw_write(SWVoiceOut *sw, void *buf, size_t size) &osamp ); ret += isamp; - swlim -= isamp; + samples -= isamp; pos += isamp; live += osamp; wpos = (wpos + osamp) % hwsamples; @@ -1003,6 +1010,11 @@ static size_t audio_get_avail (SWVoiceIn *sw) return (((int64_t) live << 32) / sw->ratio) * sw->info.bytes_per_frame; } +static size_t audio_sw_bytes_free(SWVoiceOut *sw, size_t free) +{ + return (((int64_t)free << 32) / sw->ratio) * sw->info.bytes_per_frame; +} + static size_t audio_get_free(SWVoiceOut *sw) { size_t live, dead; @@ -1022,13 +1034,11 @@ static size_t audio_get_free(SWVoiceOut *sw) dead = sw->hw->mix_buf->size - live; #ifdef DEBUG_OUT - dolog ("%s: get_free live %zu dead %zu ret %" PRId64 "\n", - SW_NAME (sw), - live, dead, (((int64_t) dead << 32) / sw->ratio) * - sw->info.bytes_per_frame); + dolog("%s: get_free live %zu dead %zu sw_bytes %zu\n", + SW_NAME(sw), live, dead, audio_sw_bytes_free(sw, dead)); #endif - return (((int64_t) dead << 32) / sw->ratio) * sw->info.bytes_per_frame; + return dead; } static void audio_capture_mix_and_clear(HWVoiceOut *hw, size_t rpos, @@ -1132,9 +1142,27 @@ static void audio_run_out (AudioState *s) } while ((hw = audio_pcm_hw_find_any_enabled_out(s, hw))) { - size_t played, live, prev_rpos, free; + size_t played, live, prev_rpos; + size_t hw_free = audio_pcm_hw_get_free(hw); int nb_live; + for (sw = hw->sw_head.lh_first; sw; sw = sw->entries.le_next) { + if (sw->active) { + size_t sw_free = audio_get_free(sw); + size_t free; + + if (hw_free > sw->total_hw_samples_mixed) { + free = audio_sw_bytes_free(sw, + MIN(sw_free, hw_free - sw->total_hw_samples_mixed)); + } else { + free = 0; + } + if (free > 0) { + sw->callback.fn(sw->callback.opaque, free); + } + } + } + live = audio_pcm_hw_get_live_out (hw, &nb_live); if (!nb_live) { live = 0; @@ -1163,14 +1191,6 @@ static void audio_run_out (AudioState *s) } if (!live) { - for (sw = hw->sw_head.lh_first; sw; sw = sw->entries.le_next) { - if (sw->active) { - free = audio_get_free (sw); - if (free > 0) { - sw->callback.fn (sw->callback.opaque, free); - } - } - } if (hw->pcm_ops->run_buffer_out) { hw->pcm_ops->run_buffer_out(hw); } @@ -1211,13 +1231,6 @@ static void audio_run_out (AudioState *s) if (!sw->total_hw_samples_mixed) { sw->empty = 1; } - - if (sw->active) { - free = audio_get_free (sw); - if (free > 0) { - sw->callback.fn (sw->callback.opaque, free); - } - } } } } @@ -1225,7 +1238,6 @@ static void audio_run_out (AudioState *s) static size_t audio_pcm_hw_run_in(HWVoiceIn *hw, size_t samples) { size_t conv = 0; - STSampleBuffer *conv_buf = hw->conv_buf; if (hw->pcm_ops->run_buffer_in) { hw->pcm_ops->run_buffer_in(hw); @@ -1241,11 +1253,7 @@ static size_t audio_pcm_hw_run_in(HWVoiceIn *hw, size_t samples) break; } - proc = MIN(size / hw->info.bytes_per_frame, - conv_buf->size - conv_buf->pos); - - hw->conv(conv_buf->samples + conv_buf->pos, buf, proc); - conv_buf->pos = (conv_buf->pos + proc) % conv_buf->size; + proc = audio_pcm_hw_conv_in(hw, buf, size / hw->info.bytes_per_frame); samples -= proc; conv += proc; @@ -1394,12 +1402,10 @@ void audio_generic_run_buffer_in(HWVoiceIn *hw) void *audio_generic_get_buffer_in(HWVoiceIn *hw, size_t *size) { - ssize_t start = (ssize_t)hw->pos_emul - hw->pending_emul; + size_t start; - if (start < 0) { - start += hw->size_emul; - } - assert(start >= 0 && start < hw->size_emul); + start = audio_ring_posb(hw->pos_emul, hw->pending_emul, hw->size_emul); + assert(start < hw->size_emul); *size = MIN(*size, hw->pending_emul); *size = MIN(*size, hw->size_emul - start); @@ -1412,16 +1418,22 @@ void audio_generic_put_buffer_in(HWVoiceIn *hw, void *buf, size_t size) hw->pending_emul -= size; } +size_t audio_generic_buffer_get_free(HWVoiceOut *hw) +{ + if (hw->buf_emul) { + return hw->size_emul - hw->pending_emul; + } else { + return hw->samples * hw->info.bytes_per_frame; + } +} + void audio_generic_run_buffer_out(HWVoiceOut *hw) { while (hw->pending_emul) { - size_t write_len, written; - ssize_t start = ((ssize_t) hw->pos_emul) - hw->pending_emul; + size_t write_len, written, start; - if (start < 0) { - start += hw->size_emul; - } - assert(start >= 0 && start < hw->size_emul); + start = audio_ring_posb(hw->pos_emul, hw->pending_emul, hw->size_emul); + assert(start < hw->size_emul); write_len = MIN(hw->pending_emul, hw->size_emul - start); @@ -1462,6 +1474,12 @@ size_t audio_generic_write(HWVoiceOut *hw, void *buf, size_t size) { size_t total = 0; + if (hw->pcm_ops->buffer_get_free) { + size_t free = hw->pcm_ops->buffer_get_free(hw); + + size = MIN(size, free); + } + while (total < size) { size_t dst_size = size - total; size_t copy_size, proc; @@ -1821,6 +1839,7 @@ void AUD_remove_card (QEMUSoundCard *card) g_free (card->name); } +static struct audio_pcm_ops capture_pcm_ops; CaptureVoiceOut *AUD_add_capture( AudioState *s, @@ -1866,6 +1885,7 @@ CaptureVoiceOut *AUD_add_capture( hw = &cap->hw; hw->s = s; + hw->pcm_ops = &capture_pcm_ops; QLIST_INIT (&hw->sw_head); QLIST_INIT (&cap->cb_head); diff --git a/audio/audio_int.h b/audio/audio_int.h index 428a091d05..2a6914d2aa 100644 --- a/audio/audio_int.h +++ b/audio/audio_int.h @@ -162,9 +162,13 @@ struct audio_pcm_ops { size_t (*write) (HWVoiceOut *hw, void *buf, size_t size); void (*run_buffer_out)(HWVoiceOut *hw); /* + * Get the free output buffer size. This is an upper limit. The size + * returned by function get_buffer_out may be smaller. + */ + size_t (*buffer_get_free)(HWVoiceOut *hw); + /* * get a buffer that after later can be passed to put_buffer_out; optional * returns the buffer, and writes it's size to size (in bytes) - * this is unrelated to the above buffer_size_out function */ void *(*get_buffer_out)(HWVoiceOut *hw, size_t *size); /* @@ -190,6 +194,7 @@ void audio_generic_run_buffer_in(HWVoiceIn *hw); void *audio_generic_get_buffer_in(HWVoiceIn *hw, size_t *size); void audio_generic_put_buffer_in(HWVoiceIn *hw, void *buf, size_t size); void audio_generic_run_buffer_out(HWVoiceOut *hw); +size_t audio_generic_buffer_get_free(HWVoiceOut *hw); void *audio_generic_get_buffer_out(HWVoiceOut *hw, size_t *size); size_t audio_generic_put_buffer_out(HWVoiceOut *hw, void *buf, size_t size); size_t audio_generic_write(HWVoiceOut *hw, void *buf, size_t size); @@ -266,6 +271,19 @@ static inline size_t audio_ring_dist(size_t dst, size_t src, size_t len) return (dst >= src) ? (dst - src) : (len - src + dst); } +/** + * audio_ring_posb() - returns new position in ringbuffer in backward + * direction at given distance + * + * @pos: current position in ringbuffer + * @dist: distance in ringbuffer to walk in reverse direction + * @len: size of ringbuffer + */ +static inline size_t audio_ring_posb(size_t pos, size_t dist, size_t len) +{ + return pos >= dist ? pos - dist : len - dist + pos; +} + #define dolog(fmt, ...) AUD_log(AUDIO_CAP, fmt, ## __VA_ARGS__) #ifdef DEBUG diff --git a/audio/coreaudio.c b/audio/coreaudio.c index d8a21d3e50..0f19d0ce01 100644 --- a/audio/coreaudio.c +++ b/audio/coreaudio.c @@ -283,6 +283,7 @@ static int coreaudio_buf_unlock (coreaudioVoiceOut *core, const char *fn_name) coreaudio_buf_unlock(core, "coreaudio_" #name); \ return ret; \ } +COREAUDIO_WRAPPER_FUNC(buffer_get_free, size_t, (HWVoiceOut *hw), (hw)) COREAUDIO_WRAPPER_FUNC(get_buffer_out, void *, (HWVoiceOut *hw, size_t *size), (hw, size)) COREAUDIO_WRAPPER_FUNC(put_buffer_out, size_t, @@ -333,12 +334,10 @@ static OSStatus audioDeviceIOProc( len = frameCount * hw->info.bytes_per_frame; while (len) { - size_t write_len; - ssize_t start = ((ssize_t) hw->pos_emul) - hw->pending_emul; - if (start < 0) { - start += hw->size_emul; - } - assert(start >= 0 && start < hw->size_emul); + size_t write_len, start; + + start = audio_ring_posb(hw->pos_emul, hw->pending_emul, hw->size_emul); + assert(start < hw->size_emul); write_len = MIN(MIN(hw->pending_emul, len), hw->size_emul - start); @@ -604,6 +603,8 @@ static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as, coreaudio_playback_logerr(status, "Could not remove voice property change listener\n"); } + + return -1; } return 0; @@ -654,6 +655,8 @@ static struct audio_pcm_ops coreaudio_pcm_ops = { .fini_out = coreaudio_fini_out, /* wrapper for audio_generic_write */ .write = coreaudio_write, + /* wrapper for audio_generic_buffer_get_free */ + .buffer_get_free = coreaudio_buffer_get_free, /* wrapper for audio_generic_get_buffer_out */ .get_buffer_out = coreaudio_get_buffer_out, /* wrapper for audio_generic_put_buffer_out */ diff --git a/audio/dsoundaudio.c b/audio/dsoundaudio.c index 3dd2c4d4a6..231f3e65b3 100644 --- a/audio/dsoundaudio.c +++ b/audio/dsoundaudio.c @@ -427,22 +427,18 @@ static void dsound_enable_out(HWVoiceOut *hw, bool enable) } } -static void *dsound_get_buffer_out(HWVoiceOut *hw, size_t *size) +static size_t dsound_buffer_get_free(HWVoiceOut *hw) { DSoundVoiceOut *ds = (DSoundVoiceOut *) hw; LPDIRECTSOUNDBUFFER dsb = ds->dsound_buffer; HRESULT hr; - DWORD ppos, wpos, act_size; - size_t req_size; - int err; - void *ret; + DWORD ppos, wpos; hr = IDirectSoundBuffer_GetCurrentPosition( dsb, &ppos, ds->first_time ? &wpos : NULL); if (FAILED(hr)) { dsound_logerr(hr, "Could not get playback buffer position\n"); - *size = 0; - return NULL; + return 0; } if (ds->first_time) { @@ -450,13 +446,20 @@ static void *dsound_get_buffer_out(HWVoiceOut *hw, size_t *size) ds->first_time = false; } - req_size = audio_ring_dist(ppos, hw->pos_emul, hw->size_emul); - req_size = MIN(req_size, hw->size_emul - hw->pos_emul); + return audio_ring_dist(ppos, hw->pos_emul, hw->size_emul); +} - if (req_size == 0) { - *size = 0; - return NULL; - } +static void *dsound_get_buffer_out(HWVoiceOut *hw, size_t *size) +{ + DSoundVoiceOut *ds = (DSoundVoiceOut *)hw; + LPDIRECTSOUNDBUFFER dsb = ds->dsound_buffer; + DWORD act_size; + size_t req_size; + int err; + void *ret; + + req_size = MIN(*size, hw->size_emul - hw->pos_emul); + assert(req_size > 0); err = dsound_lock_out(dsb, &hw->info, hw->pos_emul, req_size, &ret, NULL, &act_size, NULL, false, ds->s); @@ -699,6 +702,7 @@ static struct audio_pcm_ops dsound_pcm_ops = { .init_out = dsound_init_out, .fini_out = dsound_fini_out, .write = audio_generic_write, + .buffer_get_free = dsound_buffer_get_free, .get_buffer_out = dsound_get_buffer_out, .put_buffer_out = dsound_put_buffer_out, .enable_out = dsound_enable_out, diff --git a/audio/jackaudio.c b/audio/jackaudio.c index 317009e936..bf757250b5 100644 --- a/audio/jackaudio.c +++ b/audio/jackaudio.c @@ -483,8 +483,8 @@ static int qjack_client_init(QJackClient *c) c->buffersize = 512; } - /* create a 2 period buffer */ - qjack_buffer_create(&c->fifo, c->nchannels, c->buffersize * 2); + /* create a 3 period buffer */ + qjack_buffer_create(&c->fifo, c->nchannels, c->buffersize * 3); qjack_client_connect_ports(c); c->state = QJACK_STATE_RUNNING; @@ -652,6 +652,7 @@ static struct audio_pcm_ops jack_pcm_ops = { .init_out = qjack_init_out, .fini_out = qjack_fini_out, .write = qjack_write, + .buffer_get_free = audio_generic_buffer_get_free, .run_buffer_out = audio_generic_run_buffer_out, .enable_out = qjack_enable_out, diff --git a/audio/noaudio.c b/audio/noaudio.c index aac87dbc93..84a6bfbb1c 100644 --- a/audio/noaudio.c +++ b/audio/noaudio.c @@ -118,6 +118,7 @@ static struct audio_pcm_ops no_pcm_ops = { .init_out = no_init_out, .fini_out = no_fini_out, .write = no_write, + .buffer_get_free = audio_generic_buffer_get_free, .run_buffer_out = audio_generic_run_buffer_out, .enable_out = no_enable_out, diff --git a/audio/ossaudio.c b/audio/ossaudio.c index 60eff66424..da9c232222 100644 --- a/audio/ossaudio.c +++ b/audio/ossaudio.c @@ -389,11 +389,23 @@ static void oss_run_buffer_out(HWVoiceOut *hw) } } +static size_t oss_buffer_get_free(HWVoiceOut *hw) +{ + OSSVoiceOut *oss = (OSSVoiceOut *)hw; + + if (oss->mmapped) { + return oss_get_available_bytes(oss); + } else { + return audio_generic_buffer_get_free(hw); + } +} + static void *oss_get_buffer_out(HWVoiceOut *hw, size_t *size) { - OSSVoiceOut *oss = (OSSVoiceOut *) hw; + OSSVoiceOut *oss = (OSSVoiceOut *)hw; + if (oss->mmapped) { - *size = MIN(oss_get_available_bytes(oss), hw->size_emul - hw->pos_emul); + *size = hw->size_emul - hw->pos_emul; return hw->buf_emul + hw->pos_emul; } else { return audio_generic_get_buffer_out(hw, size); @@ -750,6 +762,7 @@ static struct audio_pcm_ops oss_pcm_ops = { .init_out = oss_init_out, .fini_out = oss_fini_out, .write = oss_write, + .buffer_get_free = oss_buffer_get_free, .run_buffer_out = oss_run_buffer_out, .get_buffer_out = oss_get_buffer_out, .put_buffer_out = oss_put_buffer_out, diff --git a/audio/paaudio.c b/audio/paaudio.c index 75401d5391..a53ed85e0b 100644 --- a/audio/paaudio.c +++ b/audio/paaudio.c @@ -201,13 +201,11 @@ unlock_and_fail: return 0; } -static void *qpa_get_buffer_out(HWVoiceOut *hw, size_t *size) +static size_t qpa_buffer_get_free(HWVoiceOut *hw) { - PAVoiceOut *p = (PAVoiceOut *) hw; + PAVoiceOut *p = (PAVoiceOut *)hw; PAConnection *c = p->g->conn; - void *ret; size_t l; - int r; pa_threaded_mainloop_lock(c->mainloop); @@ -216,7 +214,6 @@ static void *qpa_get_buffer_out(HWVoiceOut *hw, size_t *size) if (pa_stream_get_state(p->stream) != PA_STREAM_READY) { /* wait for stream to become ready */ l = 0; - ret = NULL; goto unlock; } @@ -224,16 +221,33 @@ static void *qpa_get_buffer_out(HWVoiceOut *hw, size_t *size) CHECK_SUCCESS_GOTO(c, l != (size_t) -1, unlock_and_fail, "pa_stream_writable_size failed\n"); +unlock: + pa_threaded_mainloop_unlock(c->mainloop); + return l; + +unlock_and_fail: + pa_threaded_mainloop_unlock(c->mainloop); + return 0; +} + +static void *qpa_get_buffer_out(HWVoiceOut *hw, size_t *size) +{ + PAVoiceOut *p = (PAVoiceOut *)hw; + PAConnection *c = p->g->conn; + void *ret; + int r; + + pa_threaded_mainloop_lock(c->mainloop); + + CHECK_DEAD_GOTO(c, p->stream, unlock_and_fail, + "pa_threaded_mainloop_lock failed\n"); + *size = -1; r = pa_stream_begin_write(p->stream, &ret, size); CHECK_SUCCESS_GOTO(c, r >= 0, unlock_and_fail, "pa_stream_begin_write failed\n"); -unlock: pa_threaded_mainloop_unlock(c->mainloop); - if (*size > l) { - *size = l; - } return ret; unlock_and_fail: @@ -535,11 +549,8 @@ static int qpa_init_out(HWVoiceOut *hw, struct audsettings *as, } audio_pcm_init_info (&hw->info, &obt_as); - /* - * This is wrong. hw->samples counts in frames. hw->samples will be - * number of channels times larger than expected. - */ - hw->samples = audio_buffer_samples( + /* hw->samples counts in frames */ + hw->samples = audio_buffer_frames( qapi_AudiodevPaPerDirectionOptions_base(ppdo), &obt_as, 46440); return 0; @@ -587,11 +598,8 @@ static int qpa_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque) } audio_pcm_init_info (&hw->info, &obt_as); - /* - * This is wrong. hw->samples counts in frames. hw->samples will be - * number of channels times larger than expected. - */ - hw->samples = audio_buffer_samples( + /* hw->samples counts in frames */ + hw->samples = audio_buffer_frames( qapi_AudiodevPaPerDirectionOptions_base(ppdo), &obt_as, 46440); return 0; @@ -744,7 +752,7 @@ static int qpa_validate_per_direction_opts(Audiodev *dev, { if (!pdo->has_latency) { pdo->has_latency = true; - pdo->latency = 15000; + pdo->latency = 46440; } return 1; } @@ -901,6 +909,7 @@ static struct audio_pcm_ops qpa_pcm_ops = { .init_out = qpa_init_out, .fini_out = qpa_fini_out, .write = qpa_write, + .buffer_get_free = qpa_buffer_get_free, .get_buffer_out = qpa_get_buffer_out, .put_buffer_out = qpa_put_buffer_out, .volume_out = qpa_volume_out, diff --git a/audio/sdlaudio.c b/audio/sdlaudio.c index c68c62a3e4..797b47bbdd 100644 --- a/audio/sdlaudio.c +++ b/audio/sdlaudio.c @@ -224,12 +224,11 @@ static void sdl_callback_out(void *opaque, Uint8 *buf, int len) /* dolog("callback_out: len=%d avail=%zu\n", len, hw->pending_emul); */ while (hw->pending_emul && len) { - size_t write_len; - ssize_t start = (ssize_t)hw->pos_emul - hw->pending_emul; - if (start < 0) { - start += hw->size_emul; - } - assert(start >= 0 && start < hw->size_emul); + size_t write_len, start; + + start = audio_ring_posb(hw->pos_emul, hw->pending_emul, + hw->size_emul); + assert(start < hw->size_emul); write_len = MIN(MIN(hw->pending_emul, len), hw->size_emul - start); @@ -310,6 +309,7 @@ static void sdl_callback_in(void *opaque, Uint8 *buf, int len) SDL_UnlockAudioDevice(sdl->devid); \ } +SDL_WRAPPER_FUNC(buffer_get_free, size_t, (HWVoiceOut *hw), (hw), Out) SDL_WRAPPER_FUNC(get_buffer_out, void *, (HWVoiceOut *hw, size_t *size), (hw, size), Out) SDL_WRAPPER_FUNC(put_buffer_out, size_t, @@ -347,11 +347,8 @@ static int sdl_init_out(HWVoiceOut *hw, struct audsettings *as, req.freq = as->freq; req.format = aud_to_sdlfmt (as->fmt); req.channels = as->nchannels; - /* - * This is wrong. SDL samples are QEMU frames. The buffer size will be - * the requested buffer size multiplied by the number of channels. - */ - req.samples = audio_buffer_samples( + /* SDL samples are QEMU frames */ + req.samples = audio_buffer_frames( qapi_AudiodevSdlPerDirectionOptions_base(spdo), as, 11610); req.callback = sdl_callback_out; req.userdata = sdl; @@ -472,6 +469,8 @@ static struct audio_pcm_ops sdl_pcm_ops = { .fini_out = sdl_fini_out, /* wrapper for audio_generic_write */ .write = sdl_write, + /* wrapper for audio_generic_buffer_get_free */ + .buffer_get_free = sdl_buffer_get_free, /* wrapper for audio_generic_get_buffer_out */ .get_buffer_out = sdl_get_buffer_out, /* wrapper for audio_generic_put_buffer_out */ diff --git a/audio/wavaudio.c b/audio/wavaudio.c index 20e6853f85..ac666335c7 100644 --- a/audio/wavaudio.c +++ b/audio/wavaudio.c @@ -197,6 +197,7 @@ static struct audio_pcm_ops wav_pcm_ops = { .init_out = wav_init_out, .fini_out = wav_fini_out, .write = wav_write_out, + .buffer_get_free = audio_generic_buffer_get_free, .run_buffer_out = audio_generic_run_buffer_out, .enable_out = wav_enable_out, }; @@ -67,12 +67,15 @@ #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ +/* Protected by BQL */ static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); +/* Protected by BQL */ static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states = QTAILQ_HEAD_INITIALIZER(all_bdrv_states); +/* Protected by BQL */ static QLIST_HEAD(, BlockDriver) bdrv_drivers = QLIST_HEAD_INITIALIZER(bdrv_drivers); @@ -134,6 +137,7 @@ size_t bdrv_opt_mem_align(BlockDriverState *bs) /* page size or 4k (hdd sector size) should be on the safe side */ return MAX(4096, qemu_real_host_page_size); } + IO_CODE(); return bs->bl.opt_mem_alignment; } @@ -144,6 +148,7 @@ size_t bdrv_min_mem_align(BlockDriverState *bs) /* page size or 4k (hdd sector size) should be on the safe side */ return MAX(4096, qemu_real_host_page_size); } + IO_CODE(); return bs->bl.min_mem_alignment; } @@ -269,12 +274,15 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, * image is inactivated. */ bool bdrv_is_read_only(BlockDriverState *bs) { + IO_CODE(); return !(bs->open_flags & BDRV_O_RDWR); } int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, bool ignore_allow_rdw, Error **errp) { + IO_CODE(); + /* Do not set read_only if copy_on_read is enabled */ if (bs->copy_on_read && read_only) { error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled", @@ -308,6 +316,7 @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, Error **errp) { int ret = 0; + IO_CODE(); if (!(bs->open_flags & BDRV_O_RDWR)) { return 0; @@ -384,12 +393,14 @@ static char *bdrv_make_absolute_filename(BlockDriverState *relative_to, char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp) { + GLOBAL_STATE_CODE(); return bdrv_make_absolute_filename(bs, bs->backing_file, errp); } void bdrv_register(BlockDriver *bdrv) { assert(bdrv->format_name); + GLOBAL_STATE_CODE(); QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); } @@ -398,6 +409,8 @@ BlockDriverState *bdrv_new(void) BlockDriverState *bs; int i; + GLOBAL_STATE_CODE(); + bs = g_new0(BlockDriverState, 1); QLIST_INIT(&bs->dirty_bitmaps); for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { @@ -425,6 +438,7 @@ BlockDriverState *bdrv_new(void) static BlockDriver *bdrv_do_find_format(const char *format_name) { BlockDriver *drv1; + GLOBAL_STATE_CODE(); QLIST_FOREACH(drv1, &bdrv_drivers, list) { if (!strcmp(drv1->format_name, format_name)) { @@ -440,6 +454,8 @@ BlockDriver *bdrv_find_format(const char *format_name) BlockDriver *drv1; int i; + GLOBAL_STATE_CODE(); + drv1 = bdrv_do_find_format(format_name); if (drv1) { return drv1; @@ -489,6 +505,7 @@ static int bdrv_format_is_whitelisted(const char *format_name, bool read_only) int bdrv_is_whitelisted(BlockDriver *drv, bool read_only) { + GLOBAL_STATE_CODE(); return bdrv_format_is_whitelisted(drv->format_name, read_only); } @@ -512,6 +529,7 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque) CreateCo *cco = opaque; assert(cco->drv); + GLOBAL_STATE_CODE(); ret = cco->drv->bdrv_co_create_opts(cco->drv, cco->filename, cco->opts, &local_err); @@ -524,6 +542,8 @@ int bdrv_create(BlockDriver *drv, const char* filename, { int ret; + GLOBAL_STATE_CODE(); + Coroutine *co; CreateCo cco = { .drv = drv, @@ -578,6 +598,8 @@ static int64_t create_file_fallback_truncate(BlockBackend *blk, int64_t size; int ret; + GLOBAL_STATE_CODE(); + ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, &local_err); if (ret < 0 && ret != -ENOTSUP) { @@ -616,6 +638,8 @@ static int create_file_fallback_zero_first_sector(BlockBackend *blk, int64_t bytes_to_clear; int ret; + GLOBAL_STATE_CODE(); + bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); if (bytes_to_clear) { ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); @@ -647,6 +671,8 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, Error *local_err = NULL; int ret; + GLOBAL_STATE_CODE(); + size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, @@ -699,6 +725,8 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) QDict *qdict; int ret; + GLOBAL_STATE_CODE(); + drv = bdrv_find_protocol(filename, true, errp); if (drv == NULL) { return -ENOENT; @@ -743,6 +771,7 @@ int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp) Error *local_err = NULL; int ret; + IO_CODE(); assert(bs != NULL); if (!bs->drv) { @@ -768,6 +797,7 @@ void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs) { Error *local_err = NULL; int ret; + IO_CODE(); if (!bs) { return; @@ -796,6 +826,7 @@ int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) { BlockDriver *drv = bs->drv; BlockDriverState *filtered = bdrv_filter_bs(bs); + GLOBAL_STATE_CODE(); if (drv && drv->bdrv_probe_blocksizes) { return drv->bdrv_probe_blocksizes(bs, bsz); @@ -816,6 +847,7 @@ int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) { BlockDriver *drv = bs->drv; BlockDriverState *filtered = bdrv_filter_bs(bs); + GLOBAL_STATE_CODE(); if (drv && drv->bdrv_probe_geometry) { return drv->bdrv_probe_geometry(bs, geo); @@ -870,6 +902,7 @@ static BlockDriver *find_hdev_driver(const char *filename) { int score_max = 0, score; BlockDriver *drv = NULL, *d; + GLOBAL_STATE_CODE(); QLIST_FOREACH(d, &bdrv_drivers, list) { if (d->bdrv_probe_device) { @@ -887,6 +920,7 @@ static BlockDriver *find_hdev_driver(const char *filename) static BlockDriver *bdrv_do_find_protocol(const char *protocol) { BlockDriver *drv1; + GLOBAL_STATE_CODE(); QLIST_FOREACH(drv1, &bdrv_drivers, list) { if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) { @@ -907,6 +941,7 @@ BlockDriver *bdrv_find_protocol(const char *filename, const char *p; int i; + GLOBAL_STATE_CODE(); /* TODO Drivers without bdrv_file_open must be specified explicitly */ /* @@ -972,6 +1007,7 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, { int score_max = 0, score; BlockDriver *drv = NULL, *d; + IO_CODE(); QLIST_FOREACH(d, &bdrv_drivers, list) { if (d->bdrv_probe) { @@ -993,6 +1029,8 @@ static int find_image_format(BlockBackend *file, const char *filename, uint8_t buf[BLOCK_PROBE_BUF_SIZE]; int ret = 0; + GLOBAL_STATE_CODE(); + /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) { *pdrv = &bdrv_raw; @@ -1024,6 +1062,7 @@ static int find_image_format(BlockBackend *file, const char *filename, int refresh_total_sectors(BlockDriverState *bs, int64_t hint) { BlockDriver *drv = bs->drv; + IO_CODE(); if (!drv) { return -ENOMEDIUM; @@ -1058,6 +1097,7 @@ int refresh_total_sectors(BlockDriverState *bs, int64_t hint) static void bdrv_join_options(BlockDriverState *bs, QDict *options, QDict *old_options) { + GLOBAL_STATE_CODE(); if (bs->drv && bs->drv->bdrv_join_options) { bs->drv->bdrv_join_options(options, old_options); } else { @@ -1074,6 +1114,7 @@ static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts, BlockdevDetectZeroesOptions detect_zeroes = qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value, BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err); + GLOBAL_STATE_CODE(); g_free(value); if (local_err) { error_propagate(errp, local_err); @@ -1189,6 +1230,7 @@ static void bdrv_child_cb_drained_end(BdrvChild *child, static int bdrv_child_cb_inactivate(BdrvChild *child) { BlockDriverState *bs = child->opaque; + GLOBAL_STATE_CODE(); assert(bs->open_flags & BDRV_O_INACTIVE); return 0; } @@ -1215,6 +1257,7 @@ static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx, static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options) { + GLOBAL_STATE_CODE(); *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; /* For temporary files, unconditional cache=unsafe is fine */ @@ -1235,6 +1278,7 @@ static void bdrv_backing_attach(BdrvChild *c) BlockDriverState *parent = c->opaque; BlockDriverState *backing_hd = c->bs; + GLOBAL_STATE_CODE(); assert(!parent->backing_blocker); error_setg(&parent->backing_blocker, "node is used as backing hd of '%s'", @@ -1273,6 +1317,7 @@ static void bdrv_backing_detach(BdrvChild *c) { BlockDriverState *parent = c->opaque; + GLOBAL_STATE_CODE(); assert(parent->backing_blocker); bdrv_op_unblock_all(c->bs, parent->backing_blocker); error_free(parent->backing_blocker); @@ -1285,6 +1330,7 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, BlockDriverState *parent = c->opaque; bool read_only = bdrv_is_read_only(parent); int ret; + GLOBAL_STATE_CODE(); if (read_only) { ret = bdrv_reopen_set_read_only(parent, false, errp); @@ -1316,6 +1362,7 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, int parent_flags, QDict *parent_options) { int flags = parent_flags; + GLOBAL_STATE_CODE(); /* * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL. @@ -1391,6 +1438,7 @@ static void bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; + assert_bdrv_graph_writable(bs); QLIST_INSERT_HEAD(&bs->children, child, next); if (child->role & BDRV_CHILD_COW) { @@ -1410,6 +1458,7 @@ static void bdrv_child_cb_detach(BdrvChild *child) bdrv_unapply_subtree_drain(child, bs); + assert_bdrv_graph_writable(bs); QLIST_REMOVE(child, next); } @@ -1425,6 +1474,7 @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c) { BlockDriverState *bs = c->opaque; + IO_CODE(); return bdrv_get_aio_context(bs); } @@ -1447,12 +1497,14 @@ const BdrvChildClass child_of_bds = { AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c) { + GLOBAL_STATE_CODE(); return c->klass->get_parent_aio_context(c); } static int bdrv_open_flags(BlockDriverState *bs, int flags) { int open_flags = flags; + GLOBAL_STATE_CODE(); /* * Clear flags that are internal to the block layer before opening the @@ -1465,6 +1517,8 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags) static void update_flags_from_options(int *flags, QemuOpts *opts) { + GLOBAL_STATE_CODE(); + *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY); if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) { @@ -1486,6 +1540,7 @@ static void update_flags_from_options(int *flags, QemuOpts *opts) static void update_options_from_flags(QDict *options, int flags) { + GLOBAL_STATE_CODE(); if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) { qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE); } @@ -1507,6 +1562,7 @@ static void bdrv_assign_node_name(BlockDriverState *bs, Error **errp) { char *gen_node_name = NULL; + GLOBAL_STATE_CODE(); if (!node_name) { node_name = gen_node_name = id_generate(ID_BLOCK); @@ -1551,6 +1607,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, { Error *local_err = NULL; int i, ret; + GLOBAL_STATE_CODE(); bdrv_assign_node_name(bs, node_name, &local_err); if (local_err) { @@ -1631,6 +1688,8 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, BlockDriverState *bs; int ret; + GLOBAL_STATE_CODE(); + bs = bdrv_new(); bs->open_flags = flags; bs->options = options ?: qdict_new(); @@ -1656,6 +1715,7 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, int flags, Error **errp) { + GLOBAL_STATE_CODE(); return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp); } @@ -1750,6 +1810,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, assert(bs->file == NULL); assert(options != NULL && bs->options != options); + GLOBAL_STATE_CODE(); opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); if (!qemu_opts_absorb_qdict(opts, options, errp)) { @@ -1875,6 +1936,7 @@ static QDict *parse_json_filename(const char *filename, Error **errp) QObject *options_obj; QDict *options; int ret; + GLOBAL_STATE_CODE(); ret = strstart(filename, "json:", &filename); assert(ret); @@ -1902,6 +1964,7 @@ static void parse_json_protocol(QDict *options, const char **pfilename, { QDict *json_options; Error *local_err = NULL; + GLOBAL_STATE_CODE(); /* Parse json: pseudo-protocol */ if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) { @@ -1936,6 +1999,8 @@ static int bdrv_fill_options(QDict **options, const char *filename, BlockDriver *drv = NULL; Error *local_err = NULL; + GLOBAL_STATE_CODE(); + /* * Caution: while qdict_get_try_str() is fine, getting non-string * types would require more care. When @options come from @@ -2057,11 +2122,13 @@ static bool bdrv_is_writable_after_reopen(BlockDriverState *bs, */ bool bdrv_is_writable(BlockDriverState *bs) { + IO_CODE(); return bdrv_is_writable_after_reopen(bs, NULL); } static char *bdrv_child_user_desc(BdrvChild *c) { + GLOBAL_STATE_CODE(); return c->klass->get_parent_desc(c); } @@ -2078,6 +2145,7 @@ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) assert(a->bs); assert(a->bs == b->bs); + GLOBAL_STATE_CODE(); if ((b->perm & a->shared_perm) == b->perm) { return true; @@ -2101,6 +2169,7 @@ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp) { BdrvChild *a, *b; + GLOBAL_STATE_CODE(); /* * During the loop we'll look at each pair twice. That's correct because @@ -2129,6 +2198,7 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, uint64_t *nperm, uint64_t *nshared) { assert(bs->drv && bs->drv->bdrv_child_perm); + GLOBAL_STATE_CODE(); bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, parent_perm, parent_shared, nperm, nshared); @@ -2155,6 +2225,8 @@ static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found, BdrvChild *child; g_autoptr(GHashTable) local_found = NULL; + GLOBAL_STATE_CODE(); + if (!found) { assert(!list); found = local_found = g_hash_table_new(NULL, NULL); @@ -2182,6 +2254,8 @@ static void bdrv_child_set_perm_abort(void *opaque) { BdrvChildSetPermState *s = opaque; + GLOBAL_STATE_CODE(); + s->child->perm = s->old_perm; s->child->shared_perm = s->old_shared_perm; } @@ -2195,6 +2269,7 @@ static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, Transaction *tran) { BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1); + GLOBAL_STATE_CODE(); *s = (BdrvChildSetPermState) { .child = c, @@ -2212,6 +2287,7 @@ static void bdrv_drv_set_perm_commit(void *opaque) { BlockDriverState *bs = opaque; uint64_t cumulative_perms, cumulative_shared_perms; + GLOBAL_STATE_CODE(); if (bs->drv->bdrv_set_perm) { bdrv_get_cumulative_perm(bs, &cumulative_perms, @@ -2223,6 +2299,7 @@ static void bdrv_drv_set_perm_commit(void *opaque) static void bdrv_drv_set_perm_abort(void *opaque) { BlockDriverState *bs = opaque; + GLOBAL_STATE_CODE(); if (bs->drv->bdrv_abort_perm_update) { bs->drv->bdrv_abort_perm_update(bs); @@ -2238,6 +2315,7 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm, Transaction *tran, Error **errp) { + GLOBAL_STATE_CODE(); if (!bs->drv) { return 0; } @@ -2266,6 +2344,7 @@ typedef struct BdrvReplaceChildState { static void bdrv_replace_child_commit(void *opaque) { BdrvReplaceChildState *s = opaque; + GLOBAL_STATE_CODE(); if (s->free_empty_child && !s->child->bs) { bdrv_child_free(s->child); @@ -2278,6 +2357,7 @@ static void bdrv_replace_child_abort(void *opaque) BdrvReplaceChildState *s = opaque; BlockDriverState *new_bs = s->child->bs; + GLOBAL_STATE_CODE(); /* * old_bs reference is transparently moved from @s to s->child. * @@ -2374,6 +2454,7 @@ static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q, BdrvChild *c; int ret; uint64_t cumulative_perms, cumulative_shared_perms; + GLOBAL_STATE_CODE(); bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms); @@ -2442,6 +2523,7 @@ static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, { int ret; BlockDriverState *bs; + GLOBAL_STATE_CODE(); for ( ; list; list = list->next) { bs = list->data; @@ -2466,6 +2548,8 @@ void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, uint64_t cumulative_perms = 0; uint64_t cumulative_shared_perms = BLK_PERM_ALL; + GLOBAL_STATE_CODE(); + QLIST_FOREACH(c, &bs->parents, next_parent) { cumulative_perms |= c->perm; cumulative_shared_perms &= c->shared_perm; @@ -2509,6 +2593,7 @@ static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp) int ret; Transaction *tran = tran_new(); g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); + GLOBAL_STATE_CODE(); ret = bdrv_list_refresh_perms(list, NULL, tran, errp); tran_finalize(tran, ret); @@ -2523,6 +2608,8 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, Transaction *tran = tran_new(); int ret; + GLOBAL_STATE_CODE(); + bdrv_child_set_perm(c, perm, shared, tran); ret = bdrv_refresh_perms(c->bs, &local_err); @@ -2553,6 +2640,8 @@ int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) uint64_t parent_perms, parent_shared; uint64_t perms, shared; + GLOBAL_STATE_CODE(); + bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared); bdrv_child_perm(bs, c->bs, c, c->role, NULL, parent_perms, parent_shared, &perms, &shared); @@ -2571,6 +2660,7 @@ static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { + GLOBAL_STATE_CODE(); *nperm = perm & DEFAULT_PERM_PASSTHROUGH; *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; } @@ -2582,6 +2672,7 @@ static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c, uint64_t *nperm, uint64_t *nshared) { assert(role & BDRV_CHILD_COW); + GLOBAL_STATE_CODE(); /* * We want consistent read from backing files if the parent needs it. @@ -2618,6 +2709,7 @@ static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c, { int flags; + GLOBAL_STATE_CODE(); assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)); flags = bdrv_reopen_get_flags(reopen_queue, bs); @@ -2694,6 +2786,7 @@ void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { + GLOBAL_STATE_CODE(); if (role & BDRV_CHILD_FILTERED) { assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | BDRV_CHILD_COW))); @@ -2752,6 +2845,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, assert(!child->frozen); assert(old_bs != new_bs); + GLOBAL_STATE_CODE(); if (old_bs && new_bs) { assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); @@ -2776,6 +2870,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, if (child->klass->detach) { child->klass->detach(child); } + assert_bdrv_graph_writable(old_bs); QLIST_REMOVE(child, next_parent); } @@ -2785,6 +2880,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, } if (new_bs) { + assert_bdrv_graph_writable(new_bs); QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); /* @@ -2827,6 +2923,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp, static void bdrv_child_free(BdrvChild *child) { assert(!child->bs); + GLOBAL_STATE_CODE(); assert(!child->next.le_prev); /* not in children list */ g_free(child->name); @@ -2845,6 +2942,7 @@ static void bdrv_attach_child_common_abort(void *opaque) BdrvChild *child = *s->child; BlockDriverState *bs = child->bs; + GLOBAL_STATE_CODE(); /* * Pass free_empty_child=false, because we still need the child * for the AioContext operations on the parent below; those @@ -2907,6 +3005,7 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, assert(child); assert(*child == NULL); assert(child_class->get_parent_desc); + GLOBAL_STATE_CODE(); new_child = g_new(BdrvChild, 1); *new_child = (BdrvChild) { @@ -2987,6 +3086,7 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, uint64_t perm, shared_perm; assert(parent_bs->drv); + GLOBAL_STATE_CODE(); if (bdrv_recurse_has_child(child_bs, parent_bs)) { error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle", @@ -3012,6 +3112,7 @@ static void bdrv_detach_child(BdrvChild **childp) { BlockDriverState *old_bs = (*childp)->bs; + GLOBAL_STATE_CODE(); bdrv_replace_child_noperm(childp, NULL, true); if (old_bs) { @@ -3051,6 +3152,8 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, BdrvChild *child = NULL; Transaction *tran = tran_new(); + GLOBAL_STATE_CODE(); + ret = bdrv_attach_child_common(child_bs, child_name, child_class, child_role, perm, shared_perm, opaque, &child, tran, errp); @@ -3091,6 +3194,8 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BdrvChild *child = NULL; Transaction *tran = tran_new(); + GLOBAL_STATE_CODE(); + ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class, child_role, &child, tran, errp); if (ret < 0) { @@ -3117,6 +3222,8 @@ void bdrv_root_unref_child(BdrvChild *child) { BlockDriverState *child_bs; + GLOBAL_STATE_CODE(); + child_bs = child->bs; bdrv_detach_child(&child); bdrv_unref(child_bs); @@ -3191,6 +3298,7 @@ static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child, /* Callers must ensure that child->frozen is false. */ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) { + GLOBAL_STATE_CODE(); if (child == NULL) { return; } @@ -3203,6 +3311,7 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) { BdrvChild *c; + GLOBAL_STATE_CODE(); QLIST_FOREACH(c, &bs->parents, next_parent) { if (c->klass->change_media) { c->klass->change_media(c, load); @@ -3253,6 +3362,8 @@ static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file; BdrvChildRole role; + GLOBAL_STATE_CODE(); + if (!parent_bs->drv) { /* * Node without drv is an object without a class :/. TODO: finally fix @@ -3332,6 +3443,7 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs, BlockDriverState *backing_hd, Transaction *tran, Error **errp) { + GLOBAL_STATE_CODE(); return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); } @@ -3341,6 +3453,7 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, int ret; Transaction *tran = tran_new(); + GLOBAL_STATE_CODE(); bdrv_drained_begin(bs); ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); @@ -3380,6 +3493,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, QDict *tmp_parent_options = NULL; Error *local_err = NULL; + GLOBAL_STATE_CODE(); + if (bs->backing != NULL) { goto free_exit; } @@ -3539,6 +3654,8 @@ BdrvChild *bdrv_open_child(const char *filename, { BlockDriverState *bs; + GLOBAL_STATE_CODE(); + bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, child_role, allow_none, errp); if (bs == NULL) { @@ -3561,6 +3678,8 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) const char *reference = NULL; Visitor *v = NULL; + GLOBAL_STATE_CODE(); + if (ref->type == QTYPE_QSTRING) { reference = ref->u.reference; } else { @@ -3603,6 +3722,8 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, BlockDriverState *bs_snapshot = NULL; int ret; + GLOBAL_STATE_CODE(); + /* if snapshot, we create a temporary backing file and open it instead of opening 'filename' directly */ @@ -3690,6 +3811,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, assert(!child_class || !flags); assert(!child_class == !parent); + GLOBAL_STATE_CODE(); if (reference) { bool options_non_empty = options ? qdict_size(options) : false; @@ -3958,6 +4080,8 @@ close_and_fail: BlockDriverState *bdrv_open(const char *filename, const char *reference, QDict *options, int flags, Error **errp) { + GLOBAL_STATE_CODE(); + return bdrv_open_inherit(filename, reference, options, flags, NULL, NULL, 0, errp); } @@ -4074,6 +4198,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, * important to avoid graph changes between the recursive queuing here and * bdrv_reopen_multiple(). */ assert(bs->quiesce_counter > 0); + GLOBAL_STATE_CODE(); if (bs_queue == NULL) { bs_queue = g_new0(BlockReopenQueue, 1); @@ -4212,12 +4337,15 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, BlockDriverState *bs, QDict *options, bool keep_old_opts) { + GLOBAL_STATE_CODE(); + return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false, NULL, 0, keep_old_opts); } void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) { + GLOBAL_STATE_CODE(); if (bs_queue) { BlockReopenQueueEntry *bs_entry, *next; QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { @@ -4259,6 +4387,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) assert(qemu_get_current_aio_context() == qemu_get_aio_context()); assert(bs_queue != NULL); + GLOBAL_STATE_CODE(); QTAILQ_FOREACH(bs_entry, bs_queue, entry) { ctx = bdrv_get_aio_context(bs_entry->state.bs); @@ -4365,6 +4494,8 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, BlockReopenQueue *queue; int ret; + GLOBAL_STATE_CODE(); + bdrv_subtree_drained_begin(bs); if (ctx != qemu_get_aio_context()) { aio_context_release(ctx); @@ -4386,6 +4517,8 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, { QDict *opts = qdict_new(); + GLOBAL_STATE_CODE(); + qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only); return bdrv_reopen(bs, opts, true, errp); @@ -4420,6 +4553,8 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, QObject *value; const char *str; + GLOBAL_STATE_CODE(); + value = qdict_get(reopen_state->options, child_name); if (value == NULL) { return 0; @@ -4518,6 +4653,7 @@ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, assert(reopen_state != NULL); assert(reopen_state->bs->drv != NULL); + GLOBAL_STATE_CODE(); drv = reopen_state->bs->drv; /* This function and each driver's bdrv_reopen_prepare() remove @@ -4728,6 +4864,7 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state) bs = reopen_state->bs; drv = bs->drv; assert(drv != NULL); + GLOBAL_STATE_CODE(); /* If there are any driver level actions to take */ if (drv->bdrv_reopen_commit) { @@ -4769,6 +4906,7 @@ static void bdrv_reopen_abort(BDRVReopenState *reopen_state) assert(reopen_state != NULL); drv = reopen_state->bs->drv; assert(drv != NULL); + GLOBAL_STATE_CODE(); if (drv->bdrv_reopen_abort) { drv->bdrv_reopen_abort(reopen_state); @@ -4781,6 +4919,7 @@ static void bdrv_close(BlockDriverState *bs) BdrvAioNotifier *ban, *ban_next; BdrvChild *child, *next; + GLOBAL_STATE_CODE(); assert(!bs->refcnt); bdrv_drained_begin(bs); /* complete I/O */ @@ -4840,6 +4979,7 @@ static void bdrv_close(BlockDriverState *bs) void bdrv_close_all(void) { assert(job_next(NULL) == NULL); + GLOBAL_STATE_CODE(); /* Drop references from requests still in flight, such as canceled block * jobs whose AIO context has not been polled yet */ @@ -4958,7 +5098,7 @@ static void bdrv_remove_filter_or_cow_child_abort(void *opaque) static void bdrv_remove_filter_or_cow_child_commit(void *opaque) { BdrvRemoveFilterOrCowChild *s = opaque; - + GLOBAL_STATE_CODE(); bdrv_child_free(s->child); } @@ -5041,6 +5181,7 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, BdrvChild *c, *next; assert(to != NULL); + GLOBAL_STATE_CODE(); QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->bs == from); @@ -5091,6 +5232,7 @@ static int bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to_cow_parent = NULL; int ret; + GLOBAL_STATE_CODE(); assert(to != NULL); if (detach_subchain) { @@ -5154,11 +5296,15 @@ out: int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, Error **errp) { + GLOBAL_STATE_CODE(); + return bdrv_replace_node_common(from, to, true, false, errp); } int bdrv_drop_filter(BlockDriverState *bs, Error **errp) { + GLOBAL_STATE_CODE(); + return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true, errp); } @@ -5181,6 +5327,8 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, int ret; Transaction *tran = tran_new(); + GLOBAL_STATE_CODE(); + assert(!bs_new->backing); ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing", @@ -5214,6 +5362,8 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, g_autoptr(GSList) refresh_list = NULL; BlockDriverState *old_bs = child->bs; + GLOBAL_STATE_CODE(); + bdrv_ref(old_bs); bdrv_drained_begin(old_bs); bdrv_drained_begin(new_bs); @@ -5241,6 +5391,7 @@ static void bdrv_delete(BlockDriverState *bs) { assert(bdrv_op_blocker_is_empty(bs)); assert(!bs->refcnt); + GLOBAL_STATE_CODE(); /* remove from list, if necessary */ if (bs->node_name[0] != '\0') { @@ -5285,6 +5436,8 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, node_name = qdict_get_try_str(options, "node-name"); + GLOBAL_STATE_CODE(); + new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags, errp); options = NULL; /* bdrv_new_open_driver() eats options */ @@ -5320,6 +5473,7 @@ fail: int coroutine_fn bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { + IO_CODE(); if (bs->drv == NULL) { return -ENOMEDIUM; } @@ -5345,6 +5499,8 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, BlockDriver *drv = bs->drv; int ret; + GLOBAL_STATE_CODE(); + if (!drv) { return -ENOMEDIUM; } @@ -5386,6 +5542,9 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, BlockDriverState *bdrv_find_overlay(BlockDriverState *active, BlockDriverState *bs) { + + GLOBAL_STATE_CODE(); + bs = bdrv_skip_filters(bs); active = bdrv_skip_filters(active); @@ -5403,6 +5562,8 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState *active, /* Given a BDS, searches for the base layer. */ BlockDriverState *bdrv_find_base(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); + return bdrv_find_overlay(bs, NULL); } @@ -5417,6 +5578,8 @@ bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, BlockDriverState *i; BdrvChild *child; + GLOBAL_STATE_CODE(); + for (i = bs; i != base; i = child_bs(child)) { child = bdrv_filter_or_cow_child(i); @@ -5443,6 +5606,8 @@ int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, BlockDriverState *i; BdrvChild *child; + GLOBAL_STATE_CODE(); + if (bdrv_is_backing_chain_frozen(bs, base, errp)) { return -EPERM; } @@ -5477,6 +5642,8 @@ void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base) BlockDriverState *i; BdrvChild *child; + GLOBAL_STATE_CODE(); + for (i = bs; i != base; i = child_bs(child)) { child = bdrv_filter_or_cow_child(i); if (child) { @@ -5526,6 +5693,8 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, g_autoptr(GSList) updated_children = NULL; GSList *p; + GLOBAL_STATE_CODE(); + bdrv_ref(top); bdrv_subtree_drained_begin(top); @@ -5637,6 +5806,8 @@ static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs) int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) { BlockDriver *drv = bs->drv; + IO_CODE(); + if (!drv) { return -ENOMEDIUM; } @@ -5686,6 +5857,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, BlockDriverState *in_bs, Error **errp) { + IO_CODE(); if (!drv->bdrv_measure) { error_setg(errp, "Block driver '%s' does not support size measurement", drv->format_name); @@ -5701,6 +5873,7 @@ BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, int64_t bdrv_nb_sectors(BlockDriverState *bs) { BlockDriver *drv = bs->drv; + IO_CODE(); if (!drv) return -ENOMEDIUM; @@ -5721,6 +5894,7 @@ int64_t bdrv_nb_sectors(BlockDriverState *bs) int64_t bdrv_getlength(BlockDriverState *bs) { int64_t ret = bdrv_nb_sectors(bs); + IO_CODE(); if (ret < 0) { return ret; @@ -5735,12 +5909,14 @@ int64_t bdrv_getlength(BlockDriverState *bs) void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) { int64_t nb_sectors = bdrv_nb_sectors(bs); + IO_CODE(); *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; } bool bdrv_is_sg(BlockDriverState *bs) { + IO_CODE(); return bs->sg; } @@ -5750,6 +5926,7 @@ bool bdrv_is_sg(BlockDriverState *bs) bool bdrv_supports_compressed_writes(BlockDriverState *bs) { BlockDriverState *filtered; + IO_CODE(); if (!bs->drv || !block_driver_can_compress(bs->drv)) { return false; @@ -5769,6 +5946,7 @@ bool bdrv_supports_compressed_writes(BlockDriverState *bs) const char *bdrv_get_format_name(BlockDriverState *bs) { + IO_CODE(); return bs->drv ? bs->drv->format_name : NULL; } @@ -5785,6 +5963,8 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name), int i; const char **formats = NULL; + GLOBAL_STATE_CODE(); + QLIST_FOREACH(drv, &bdrv_drivers, list) { if (drv->format_name) { bool found = false; @@ -5843,6 +6023,7 @@ BlockDriverState *bdrv_find_node(const char *node_name) BlockDriverState *bs; assert(node_name); + GLOBAL_STATE_CODE(); QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { if (!strcmp(node_name, bs->node_name)) { @@ -5859,6 +6040,8 @@ BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, BlockDeviceInfoList *list; BlockDriverState *bs; + GLOBAL_STATE_CODE(); + list = NULL; QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp); @@ -5934,6 +6117,7 @@ static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent, { BlockPermission qapi_perm; XDbgBlockGraphEdge *edge; + GLOBAL_STATE_CODE(); edge = g_new0(XDbgBlockGraphEdge, 1); @@ -5964,6 +6148,8 @@ XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp) BdrvChild *child; XDbgBlockGraphConstructor *gr = xdbg_graph_new(); + GLOBAL_STATE_CODE(); + for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { char *allocated_name = NULL; const char *name = blk_name(blk); @@ -6007,6 +6193,8 @@ BlockDriverState *bdrv_lookup_bs(const char *device, BlockBackend *blk; BlockDriverState *bs; + GLOBAL_STATE_CODE(); + if (device) { blk = blk_by_name(device); @@ -6038,6 +6226,9 @@ BlockDriverState *bdrv_lookup_bs(const char *device, * return false. If either argument is NULL, return false. */ bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) { + + GLOBAL_STATE_CODE(); + while (top && top != base) { top = bdrv_filter_or_cow_bs(top); } @@ -6047,6 +6238,7 @@ bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) BlockDriverState *bdrv_next_node(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); if (!bs) { return QTAILQ_FIRST(&graph_bdrv_states); } @@ -6055,6 +6247,7 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs) BlockDriverState *bdrv_next_all_states(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); if (!bs) { return QTAILQ_FIRST(&all_bdrv_states); } @@ -6063,6 +6256,7 @@ BlockDriverState *bdrv_next_all_states(BlockDriverState *bs) const char *bdrv_get_node_name(const BlockDriverState *bs) { + IO_CODE(); return bs->node_name; } @@ -6070,6 +6264,7 @@ const char *bdrv_get_parent_name(const BlockDriverState *bs) { BdrvChild *c; const char *name; + IO_CODE(); /* If multiple parents have a name, just pick the first one. */ QLIST_FOREACH(c, &bs->parents, next_parent) { @@ -6087,6 +6282,7 @@ const char *bdrv_get_parent_name(const BlockDriverState *bs) /* TODO check what callers really want: bs->node_name or blk_name() */ const char *bdrv_get_device_name(const BlockDriverState *bs) { + IO_CODE(); return bdrv_get_parent_name(bs) ?: ""; } @@ -6096,22 +6292,26 @@ const char *bdrv_get_device_name(const BlockDriverState *bs) * absent, then this returns an empty (non-null) string. */ const char *bdrv_get_device_or_node_name(const BlockDriverState *bs) { + IO_CODE(); return bdrv_get_parent_name(bs) ?: bs->node_name; } int bdrv_get_flags(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); return bs->open_flags; } int bdrv_has_zero_init_1(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); return 1; } int bdrv_has_zero_init(BlockDriverState *bs) { BlockDriverState *filtered; + GLOBAL_STATE_CODE(); if (!bs->drv) { return 0; @@ -6137,6 +6337,7 @@ int bdrv_has_zero_init(BlockDriverState *bs) bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) { + IO_CODE(); if (!(bs->open_flags & BDRV_O_UNMAP)) { return false; } @@ -6147,6 +6348,7 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) void bdrv_get_backing_filename(BlockDriverState *bs, char *filename, int filename_size) { + IO_CODE(); pstrcpy(filename, filename_size, bs->backing_file); } @@ -6154,6 +6356,7 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) { int ret; BlockDriver *drv = bs->drv; + IO_CODE(); /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ if (!drv) { return -ENOMEDIUM; @@ -6182,6 +6385,7 @@ ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, Error **errp) { BlockDriver *drv = bs->drv; + IO_CODE(); if (drv && drv->bdrv_get_specific_info) { return drv->bdrv_get_specific_info(bs, errp); } @@ -6191,6 +6395,7 @@ ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs) { BlockDriver *drv = bs->drv; + IO_CODE(); if (!drv || !drv->bdrv_get_specific_stats) { return NULL; } @@ -6199,6 +6404,7 @@ BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs) void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event) { + IO_CODE(); if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { return; } @@ -6208,6 +6414,7 @@ void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event) static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { bs = bdrv_primary_bs(bs); } @@ -6223,6 +6430,7 @@ static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs) int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, const char *tag) { + GLOBAL_STATE_CODE(); bs = bdrv_find_debug_node(bs); if (bs) { return bs->drv->bdrv_debug_breakpoint(bs, event, tag); @@ -6233,6 +6441,7 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) { + GLOBAL_STATE_CODE(); bs = bdrv_find_debug_node(bs); if (bs) { return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); @@ -6243,6 +6452,7 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) int bdrv_debug_resume(BlockDriverState *bs, const char *tag) { + GLOBAL_STATE_CODE(); while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { bs = bdrv_primary_bs(bs); } @@ -6256,6 +6466,7 @@ int bdrv_debug_resume(BlockDriverState *bs, const char *tag) bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) { + GLOBAL_STATE_CODE(); while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { bs = bdrv_primary_bs(bs); } @@ -6283,6 +6494,8 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, BlockDriverState *retval = NULL; BlockDriverState *bs_below; + GLOBAL_STATE_CODE(); + if (!bs || !bs->drv || !backing_file) { return NULL; } @@ -6393,19 +6606,21 @@ void bdrv_init_with_whitelist(void) bdrv_init(); } -int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) +int bdrv_activate(BlockDriverState *bs, Error **errp) { BdrvChild *child, *parent; Error *local_err = NULL; int ret; BdrvDirtyBitmap *bm; + GLOBAL_STATE_CODE(); + if (!bs->drv) { return -ENOMEDIUM; } QLIST_FOREACH(child, &bs->children, next) { - bdrv_co_invalidate_cache(child->bs, &local_err); + bdrv_activate(child->bs, &local_err); if (local_err) { error_propagate(errp, local_err); return -EINVAL; @@ -6418,7 +6633,7 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) * Note that the required permissions of inactive images are always a * subset of the permissions required after activating the image. This * allows us to just get the permissions upfront without restricting - * drv->bdrv_invalidate_cache(). + * bdrv_co_invalidate_cache(). * * It also means that in error cases, we don't have to try and revert to * the old permissions (which is an operation that could fail, too). We can @@ -6433,13 +6648,10 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) return ret; } - if (bs->drv->bdrv_co_invalidate_cache) { - bs->drv->bdrv_co_invalidate_cache(bs, &local_err); - if (local_err) { - bs->open_flags |= BDRV_O_INACTIVE; - error_propagate(errp, local_err); - return -EINVAL; - } + ret = bdrv_invalidate_cache(bs, errp); + if (ret < 0) { + bs->open_flags |= BDRV_O_INACTIVE; + return ret; } FOR_EACH_DIRTY_BITMAP(bs, bm) { @@ -6468,17 +6680,37 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) return 0; } -void bdrv_invalidate_cache_all(Error **errp) +int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) +{ + Error *local_err = NULL; + IO_CODE(); + + assert(!(bs->open_flags & BDRV_O_INACTIVE)); + + if (bs->drv->bdrv_co_invalidate_cache) { + bs->drv->bdrv_co_invalidate_cache(bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; + } + } + + return 0; +} + +void bdrv_activate_all(Error **errp) { BlockDriverState *bs; BdrvNextIterator it; + GLOBAL_STATE_CODE(); + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); int ret; aio_context_acquire(aio_context); - ret = bdrv_invalidate_cache(bs, errp); + ret = bdrv_activate(bs, errp); aio_context_release(aio_context); if (ret < 0) { bdrv_next_cleanup(&it); @@ -6490,6 +6722,7 @@ void bdrv_invalidate_cache_all(Error **errp) static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) { BdrvChild *parent; + GLOBAL_STATE_CODE(); QLIST_FOREACH(parent, &bs->parents, next_parent) { if (parent->klass->parent_is_bds) { @@ -6509,6 +6742,8 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs) int ret; uint64_t cumulative_perms, cumulative_shared_perms; + GLOBAL_STATE_CODE(); + if (!bs->drv) { return -ENOMEDIUM; } @@ -6572,6 +6807,8 @@ int bdrv_inactivate_all(void) int ret = 0; GSList *aio_ctxs = NULL, *ctx; + GLOBAL_STATE_CODE(); + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); @@ -6615,6 +6852,7 @@ bool bdrv_is_inserted(BlockDriverState *bs) { BlockDriver *drv = bs->drv; BdrvChild *child; + IO_CODE(); if (!drv) { return false; @@ -6636,6 +6874,7 @@ bool bdrv_is_inserted(BlockDriverState *bs) void bdrv_eject(BlockDriverState *bs, bool eject_flag) { BlockDriver *drv = bs->drv; + IO_CODE(); if (drv && drv->bdrv_eject) { drv->bdrv_eject(bs, eject_flag); @@ -6649,7 +6888,7 @@ void bdrv_eject(BlockDriverState *bs, bool eject_flag) void bdrv_lock_medium(BlockDriverState *bs, bool locked) { BlockDriver *drv = bs->drv; - + IO_CODE(); trace_bdrv_lock_medium(bs, locked); if (drv && drv->bdrv_lock_medium) { @@ -6660,6 +6899,7 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked) /* Get a reference to bs */ void bdrv_ref(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); bs->refcnt++; } @@ -6668,6 +6908,7 @@ void bdrv_ref(BlockDriverState *bs) * deleted. */ void bdrv_unref(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); if (!bs) { return; } @@ -6685,6 +6926,7 @@ struct BdrvOpBlocker { bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) { BdrvOpBlocker *blocker; + GLOBAL_STATE_CODE(); assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); if (!QLIST_EMPTY(&bs->op_blockers[op])) { blocker = QLIST_FIRST(&bs->op_blockers[op]); @@ -6699,6 +6941,7 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) { BdrvOpBlocker *blocker; + GLOBAL_STATE_CODE(); assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); blocker = g_new0(BdrvOpBlocker, 1); @@ -6709,6 +6952,7 @@ void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) { BdrvOpBlocker *blocker, *next; + GLOBAL_STATE_CODE(); assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { if (blocker->reason == reason) { @@ -6721,6 +6965,7 @@ void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) void bdrv_op_block_all(BlockDriverState *bs, Error *reason) { int i; + GLOBAL_STATE_CODE(); for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { bdrv_op_block(bs, i, reason); } @@ -6729,6 +6974,7 @@ void bdrv_op_block_all(BlockDriverState *bs, Error *reason) void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) { int i; + GLOBAL_STATE_CODE(); for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { bdrv_op_unblock(bs, i, reason); } @@ -6737,7 +6983,7 @@ void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) bool bdrv_op_blocker_is_empty(BlockDriverState *bs) { int i; - + GLOBAL_STATE_CODE(); for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { if (!QLIST_EMPTY(&bs->op_blockers[i])) { return false; @@ -6759,6 +7005,8 @@ void bdrv_img_create(const char *filename, const char *fmt, Error *local_err = NULL; int ret = 0; + GLOBAL_STATE_CODE(); + /* Find driver and parse its options */ drv = bdrv_find_format(fmt); if (!drv) { @@ -6936,6 +7184,7 @@ out: AioContext *bdrv_get_aio_context(BlockDriverState *bs) { + IO_CODE(); return bs ? bs->aio_context : qemu_get_aio_context(); } @@ -6944,6 +7193,7 @@ AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs) Coroutine *self = qemu_coroutine_self(); AioContext *old_ctx = qemu_coroutine_get_aio_context(self); AioContext *new_ctx; + IO_CODE(); /* * Increase bs->in_flight to ensure that this operation is completed before @@ -6958,6 +7208,7 @@ AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs) void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) { + IO_CODE(); aio_co_reschedule_self(old_ctx); bdrv_dec_in_flight(bs); } @@ -6991,11 +7242,13 @@ void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) { + IO_CODE(); aio_co_enter(bdrv_get_aio_context(bs), co); } static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) { + GLOBAL_STATE_CODE(); QLIST_REMOVE(ban, list); g_free(ban); } @@ -7005,6 +7258,7 @@ static void bdrv_detach_aio_context(BlockDriverState *bs) BdrvAioNotifier *baf, *baf_tmp; assert(!bs->walking_aio_notifiers); + GLOBAL_STATE_CODE(); bs->walking_aio_notifiers = true; QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) { if (baf->deleted) { @@ -7032,6 +7286,7 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, AioContext *new_context) { BdrvAioNotifier *ban, *ban_tmp; + GLOBAL_STATE_CODE(); if (bs->quiesce_counter) { aio_disable_external(new_context); @@ -7078,6 +7333,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, BdrvChild *child, *parent; g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + GLOBAL_STATE_CODE(); if (old_context == new_context) { return; @@ -7150,6 +7406,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, GSList **ignore, Error **errp) { + GLOBAL_STATE_CODE(); if (g_slist_find(*ignore, c)) { return true; } @@ -7175,6 +7432,7 @@ static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, GSList **ignore, Error **errp) { + GLOBAL_STATE_CODE(); if (g_slist_find(*ignore, c)) { return true; } @@ -7193,6 +7451,8 @@ bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, return true; } + GLOBAL_STATE_CODE(); + QLIST_FOREACH(c, &bs->parents, next_parent) { if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) { return false; @@ -7213,6 +7473,8 @@ int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, GSList *ignore; bool ret; + GLOBAL_STATE_CODE(); + ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL; ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp); g_slist_free(ignore); @@ -7231,6 +7493,7 @@ int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, Error **errp) { + GLOBAL_STATE_CODE(); return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp); } @@ -7244,6 +7507,7 @@ void bdrv_add_aio_context_notifier(BlockDriverState *bs, .detach_aio_context = detach_aio_context, .opaque = opaque }; + GLOBAL_STATE_CODE(); QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list); } @@ -7255,6 +7519,7 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs, void *opaque) { BdrvAioNotifier *ban, *ban_next; + GLOBAL_STATE_CODE(); QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { if (ban->attached_aio_context == attached_aio_context && @@ -7279,6 +7544,7 @@ int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, bool force, Error **errp) { + GLOBAL_STATE_CODE(); if (!bs->drv) { error_setg(errp, "Node is ejected"); return -ENOMEDIUM; @@ -7309,6 +7575,8 @@ bool bdrv_recurse_can_replace(BlockDriverState *bs, { BlockDriverState *filtered; + GLOBAL_STATE_CODE(); + if (!bs || !bs->drv) { return false; } @@ -7349,6 +7617,8 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, BlockDriverState *to_replace_bs = bdrv_find_node(node_name); AioContext *aio_context; + GLOBAL_STATE_CODE(); + if (!to_replace_bs) { error_setg(errp, "Failed to find node with node-name='%s'", node_name); return NULL; @@ -7478,6 +7748,7 @@ static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs) * would result in exactly bs->backing. */ static bool bdrv_backing_overridden(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); if (bs->backing) { return strcmp(bs->auto_backing_file, bs->backing->bs->filename); @@ -7510,6 +7781,8 @@ void bdrv_refresh_filename(BlockDriverState *bs) bool generate_json_filename; /* Whether our default implementation should fill exact_filename (false) or not (true) */ + GLOBAL_STATE_CODE(); + if (!drv) { return; } @@ -7632,6 +7905,8 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp) BlockDriver *drv = bs->drv; BlockDriverState *child_bs; + GLOBAL_STATE_CODE(); + if (!drv) { error_setg(errp, "Node '%s' is ejected", bs->node_name); return NULL; @@ -7663,7 +7938,7 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp) void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, Error **errp) { - + GLOBAL_STATE_CODE(); if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) { error_setg(errp, "The node %s does not support adding a child", bdrv_get_device_or_node_name(parent_bs)); @@ -7683,6 +7958,7 @@ void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) { BdrvChild *tmp; + GLOBAL_STATE_CODE(); if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) { error_setg(errp, "The node %s does not support removing a child", bdrv_get_device_or_node_name(parent_bs)); @@ -7710,6 +7986,7 @@ int bdrv_make_empty(BdrvChild *c, Error **errp) BlockDriver *drv = c->bs->drv; int ret; + GLOBAL_STATE_CODE(); assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)); if (!drv->bdrv_make_empty) { @@ -7734,6 +8011,8 @@ int bdrv_make_empty(BdrvChild *c, Error **errp) */ BdrvChild *bdrv_cow_child(BlockDriverState *bs) { + IO_CODE(); + if (!bs || !bs->drv) { return NULL; } @@ -7757,6 +8036,7 @@ BdrvChild *bdrv_cow_child(BlockDriverState *bs) BdrvChild *bdrv_filter_child(BlockDriverState *bs) { BdrvChild *c; + IO_CODE(); if (!bs || !bs->drv) { return NULL; @@ -7788,6 +8068,7 @@ BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs) { BdrvChild *cow_child = bdrv_cow_child(bs); BdrvChild *filter_child = bdrv_filter_child(bs); + IO_CODE(); /* Filter nodes cannot have COW backing files */ assert(!(cow_child && filter_child)); @@ -7808,6 +8089,7 @@ BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs) BdrvChild *bdrv_primary_child(BlockDriverState *bs) { BdrvChild *c, *found = NULL; + IO_CODE(); QLIST_FOREACH(c, &bs->children, next) { if (c->role & BDRV_CHILD_PRIMARY) { @@ -7860,6 +8142,7 @@ static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs, */ BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); return bdrv_do_skip_filters(bs, true); } @@ -7869,6 +8152,7 @@ BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs) */ BlockDriverState *bdrv_skip_filters(BlockDriverState *bs) { + IO_CODE(); return bdrv_do_skip_filters(bs, false); } @@ -7878,6 +8162,7 @@ BlockDriverState *bdrv_skip_filters(BlockDriverState *bs) */ BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs) { + IO_CODE(); return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs))); } @@ -7913,8 +8198,8 @@ static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs, */ bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum) { + IO_CODE(); RCU_READ_LOCK_GUARD(); - return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum); } @@ -7924,6 +8209,7 @@ bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum) void bdrv_bsc_invalidate_range(BlockDriverState *bs, int64_t offset, int64_t bytes) { + IO_CODE(); RCU_READ_LOCK_GUARD(); if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) { @@ -7938,6 +8224,7 @@ void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes) { BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1); BdrvBlockStatusCache *old_bsc; + IO_CODE(); *new_bsc = (BdrvBlockStatusCache) { .valid = true, diff --git a/block/amend.c b/block/amend.c index 392df9ef83..f696a006e3 100644 --- a/block/amend.c +++ b/block/amend.c @@ -53,10 +53,31 @@ static int coroutine_fn blockdev_amend_run(Job *job, Error **errp) return ret; } +static int blockdev_amend_pre_run(BlockdevAmendJob *s, Error **errp) +{ + if (s->bs->drv->bdrv_amend_pre_run) { + return s->bs->drv->bdrv_amend_pre_run(s->bs, errp); + } + + return 0; +} + +static void blockdev_amend_free(Job *job) +{ + BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common); + + if (s->bs->drv->bdrv_amend_clean) { + s->bs->drv->bdrv_amend_clean(s->bs); + } + + bdrv_unref(s->bs); +} + static const JobDriver blockdev_amend_job_driver = { .instance_size = sizeof(BlockdevAmendJob), .job_type = JOB_TYPE_AMEND, .run = blockdev_amend_run, + .free = blockdev_amend_free, }; void qmp_x_blockdev_amend(const char *job_id, @@ -110,8 +131,15 @@ void qmp_x_blockdev_amend(const char *job_id, return; } + bdrv_ref(bs); s->bs = bs, s->opts = QAPI_CLONE(BlockdevAmendOptions, options), s->force = has_force ? force : false; + + if (blockdev_amend_pre_run(s, errp)) { + job_early_fail(&s->common); + return; + } + job_start(&s->common); } diff --git a/block/backup.c b/block/backup.c index 21d5983779..5cfd0b999c 100644 --- a/block/backup.c +++ b/block/backup.c @@ -372,6 +372,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, assert(bs); assert(target); + GLOBAL_STATE_CODE(); /* QMP interface protects us from these cases */ assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL); diff --git a/block/blkverify.c b/block/blkverify.c index d1facf5ba9..53ba1c9195 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -16,6 +16,7 @@ #include "qemu/cutils.h" #include "qemu/module.h" #include "qemu/option.h" +#include "qemu/memalign.h" typedef struct { BdrvChild *test_file; diff --git a/block/block-backend.c b/block/block-backend.c index 4ff6b4d785..e0e1aff4b1 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -79,6 +79,7 @@ struct BlockBackend { bool allow_aio_context_change; bool allow_write_beyond_eof; + /* Protected by BQL */ NotifierList remove_bs_notifiers, insert_bs_notifiers; QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers; @@ -111,12 +112,14 @@ static const AIOCBInfo block_backend_aiocb_info = { static void drive_info_del(DriveInfo *dinfo); static BlockBackend *bdrv_first_blk(BlockDriverState *bs); -/* All BlockBackends */ +/* All BlockBackends. Protected by BQL. */ static QTAILQ_HEAD(, BlockBackend) block_backends = QTAILQ_HEAD_INITIALIZER(block_backends); -/* All BlockBackends referenced by the monitor and which are iterated through by - * blk_next() */ +/* + * All BlockBackends referenced by the monitor and which are iterated through by + * blk_next(). Protected by BQL. + */ static QTAILQ_HEAD(, BlockBackend) monitor_block_backends = QTAILQ_HEAD_INITIALIZER(monitor_block_backends); @@ -236,6 +239,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp) void blk_set_force_allow_inactivate(BlockBackend *blk) { + GLOBAL_STATE_CODE(); blk->force_allow_inactivate = true; } @@ -354,6 +358,8 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) { BlockBackend *blk; + GLOBAL_STATE_CODE(); + blk = g_new0(BlockBackend, 1); blk->refcnt = 1; blk->ctx = ctx; @@ -391,6 +397,8 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, { BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm); + GLOBAL_STATE_CODE(); + if (blk_insert_bs(blk, bs, errp) < 0) { blk_unref(blk); return NULL; @@ -419,6 +427,8 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, uint64_t perm = 0; uint64_t shared = BLK_PERM_ALL; + GLOBAL_STATE_CODE(); + /* * blk_new_open() is mainly used in .bdrv_create implementations and the * tools where sharing isn't a major concern because the BDS stays private @@ -496,6 +506,7 @@ static void drive_info_del(DriveInfo *dinfo) int blk_get_refcnt(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk ? blk->refcnt : 0; } @@ -506,6 +517,7 @@ int blk_get_refcnt(BlockBackend *blk) void blk_ref(BlockBackend *blk) { assert(blk->refcnt > 0); + GLOBAL_STATE_CODE(); blk->refcnt++; } @@ -516,6 +528,7 @@ void blk_ref(BlockBackend *blk) */ void blk_unref(BlockBackend *blk) { + GLOBAL_STATE_CODE(); if (blk) { assert(blk->refcnt > 0); if (blk->refcnt > 1) { @@ -536,6 +549,7 @@ void blk_unref(BlockBackend *blk) */ BlockBackend *blk_all_next(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&block_backends); } @@ -544,6 +558,8 @@ void blk_remove_all_bs(void) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); + while ((blk = blk_all_next(blk)) != NULL) { AioContext *ctx = blk_get_aio_context(blk); @@ -567,6 +583,7 @@ void blk_remove_all_bs(void) */ BlockBackend *blk_next(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk ? QTAILQ_NEXT(blk, monitor_link) : QTAILQ_FIRST(&monitor_block_backends); } @@ -633,6 +650,7 @@ static void bdrv_next_reset(BdrvNextIterator *it) BlockDriverState *bdrv_first(BdrvNextIterator *it) { + GLOBAL_STATE_CODE(); bdrv_next_reset(it); return bdrv_next(it); } @@ -670,6 +688,7 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp) { assert(!blk->name); assert(name && name[0]); + GLOBAL_STATE_CODE(); if (!id_wellformed(name)) { error_setg(errp, "Invalid device name"); @@ -697,6 +716,8 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp) */ void monitor_remove_blk(BlockBackend *blk) { + GLOBAL_STATE_CODE(); + if (!blk->name) { return; } @@ -712,6 +733,7 @@ void monitor_remove_blk(BlockBackend *blk) */ const char *blk_name(const BlockBackend *blk) { + IO_CODE(); return blk->name ?: ""; } @@ -723,6 +745,7 @@ BlockBackend *blk_by_name(const char *name) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); assert(name); while ((blk = blk_next(blk)) != NULL) { if (!strcmp(name, blk->name)) { @@ -737,12 +760,16 @@ BlockBackend *blk_by_name(const char *name) */ BlockDriverState *blk_bs(BlockBackend *blk) { + IO_CODE(); return blk->root ? blk->root->bs : NULL; } static BlockBackend *bdrv_first_blk(BlockDriverState *bs) { BdrvChild *child; + + GLOBAL_STATE_CODE(); + QLIST_FOREACH(child, &bs->parents, next_parent) { if (child->klass == &child_root) { return child->opaque; @@ -757,6 +784,7 @@ static BlockBackend *bdrv_first_blk(BlockDriverState *bs) */ bool bdrv_has_blk(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); return bdrv_first_blk(bs) != NULL; } @@ -767,6 +795,7 @@ bool bdrv_is_root_node(BlockDriverState *bs) { BdrvChild *c; + GLOBAL_STATE_CODE(); QLIST_FOREACH(c, &bs->parents, next_parent) { if (c->klass != &child_root) { return false; @@ -781,6 +810,7 @@ bool bdrv_is_root_node(BlockDriverState *bs) */ DriveInfo *blk_legacy_dinfo(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->legacy_dinfo; } @@ -792,6 +822,7 @@ DriveInfo *blk_legacy_dinfo(BlockBackend *blk) DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo) { assert(!blk->legacy_dinfo); + GLOBAL_STATE_CODE(); return blk->legacy_dinfo = dinfo; } @@ -802,6 +833,7 @@ DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo) BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); while ((blk = blk_next(blk)) != NULL) { if (blk->legacy_dinfo == dinfo) { @@ -816,6 +848,7 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo) */ BlockBackendPublic *blk_get_public(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return &blk->public; } @@ -824,6 +857,7 @@ BlockBackendPublic *blk_get_public(BlockBackend *blk) */ BlockBackend *blk_by_public(BlockBackendPublic *public) { + GLOBAL_STATE_CODE(); return container_of(public, BlockBackend, public); } @@ -835,6 +869,8 @@ void blk_remove_bs(BlockBackend *blk) ThrottleGroupMember *tgm = &blk->public.throttle_group_member; BdrvChild *root; + GLOBAL_STATE_CODE(); + notifier_list_notify(&blk->remove_bs_notifiers, blk); if (tgm->throttle_state) { BlockDriverState *bs = blk_bs(blk); @@ -869,6 +905,7 @@ void blk_remove_bs(BlockBackend *blk) int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) { ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + GLOBAL_STATE_CODE(); bdrv_ref(bs); blk->root = bdrv_root_attach_child(bs, "root", &child_root, BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, @@ -892,6 +929,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) */ int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp) { + GLOBAL_STATE_CODE(); return bdrv_replace_child_bs(blk->root, new_bs, errp); } @@ -902,6 +940,7 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, Error **errp) { int ret; + GLOBAL_STATE_CODE(); if (blk->root && !blk->disable_perm) { ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp); @@ -918,6 +957,7 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) { + GLOBAL_STATE_CODE(); *perm = blk->perm; *shared_perm = blk->shared_perm; } @@ -928,6 +968,7 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) */ int blk_attach_dev(BlockBackend *blk, DeviceState *dev) { + GLOBAL_STATE_CODE(); if (blk->dev) { return -EBUSY; } @@ -953,6 +994,7 @@ int blk_attach_dev(BlockBackend *blk, DeviceState *dev) void blk_detach_dev(BlockBackend *blk, DeviceState *dev) { assert(blk->dev == dev); + GLOBAL_STATE_CODE(); blk->dev = NULL; blk->dev_ops = NULL; blk->dev_opaque = NULL; @@ -966,6 +1008,7 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev) */ DeviceState *blk_get_attached_dev(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->dev; } @@ -974,6 +1017,7 @@ DeviceState *blk_get_attached_dev(BlockBackend *blk) char *blk_get_attached_dev_id(BlockBackend *blk) { DeviceState *dev = blk->dev; + IO_CODE(); if (!dev) { return g_strdup(""); @@ -994,6 +1038,8 @@ BlockBackend *blk_by_dev(void *dev) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); + assert(dev != NULL); while ((blk = blk_all_next(blk)) != NULL) { if (blk->dev == dev) { @@ -1011,6 +1057,7 @@ BlockBackend *blk_by_dev(void *dev) void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque) { + GLOBAL_STATE_CODE(); blk->dev_ops = ops; blk->dev_opaque = opaque; @@ -1032,6 +1079,7 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, */ void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp) { + GLOBAL_STATE_CODE(); if (blk->dev_ops && blk->dev_ops->change_media_cb) { bool tray_was_open, tray_is_open; Error *local_err = NULL; @@ -1064,6 +1112,7 @@ static void blk_root_change_media(BdrvChild *child, bool load) */ bool blk_dev_has_removable_media(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb); } @@ -1072,6 +1121,7 @@ bool blk_dev_has_removable_media(BlockBackend *blk) */ bool blk_dev_has_tray(BlockBackend *blk) { + IO_CODE(); return blk->dev_ops && blk->dev_ops->is_tray_open; } @@ -1081,6 +1131,7 @@ bool blk_dev_has_tray(BlockBackend *blk) */ void blk_dev_eject_request(BlockBackend *blk, bool force) { + GLOBAL_STATE_CODE(); if (blk->dev_ops && blk->dev_ops->eject_request_cb) { blk->dev_ops->eject_request_cb(blk->dev_opaque, force); } @@ -1091,6 +1142,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force) */ bool blk_dev_is_tray_open(BlockBackend *blk) { + IO_CODE(); if (blk_dev_has_tray(blk)) { return blk->dev_ops->is_tray_open(blk->dev_opaque); } @@ -1103,6 +1155,7 @@ bool blk_dev_is_tray_open(BlockBackend *blk) */ bool blk_dev_is_medium_locked(BlockBackend *blk) { + GLOBAL_STATE_CODE(); if (blk->dev_ops && blk->dev_ops->is_medium_locked) { return blk->dev_ops->is_medium_locked(blk->dev_opaque); } @@ -1123,6 +1176,7 @@ static void blk_root_resize(BdrvChild *child) void blk_iostatus_enable(BlockBackend *blk) { + GLOBAL_STATE_CODE(); blk->iostatus_enabled = true; blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK; } @@ -1131,6 +1185,7 @@ void blk_iostatus_enable(BlockBackend *blk) * enables it _and_ the VM is configured to stop on errors */ bool blk_iostatus_is_enabled(const BlockBackend *blk) { + IO_CODE(); return (blk->iostatus_enabled && (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || blk->on_write_error == BLOCKDEV_ON_ERROR_STOP || @@ -1139,16 +1194,19 @@ bool blk_iostatus_is_enabled(const BlockBackend *blk) BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->iostatus; } void blk_iostatus_disable(BlockBackend *blk) { + GLOBAL_STATE_CODE(); blk->iostatus_enabled = false; } void blk_iostatus_reset(BlockBackend *blk) { + GLOBAL_STATE_CODE(); if (blk_iostatus_is_enabled(blk)) { blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK; } @@ -1156,6 +1214,7 @@ void blk_iostatus_reset(BlockBackend *blk) void blk_iostatus_set_err(BlockBackend *blk, int error) { + IO_CODE(); assert(blk_iostatus_is_enabled(blk)); if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : @@ -1165,16 +1224,19 @@ void blk_iostatus_set_err(BlockBackend *blk, int error) void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow) { + IO_CODE(); blk->allow_write_beyond_eof = allow; } void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow) { + IO_CODE(); blk->allow_aio_context_change = allow; } void blk_set_disable_request_queuing(BlockBackend *blk, bool disable) { + IO_CODE(); blk->disable_request_queuing = disable; } @@ -1228,6 +1290,7 @@ blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes, { int ret; BlockDriverState *bs; + IO_CODE(); blk_wait_while_drained(blk); @@ -1258,6 +1321,7 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, BdrvRequestFlags flags) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); ret = blk_co_do_preadv(blk, offset, bytes, qiov, flags); @@ -1274,6 +1338,7 @@ blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, { int ret; BlockDriverState *bs; + IO_CODE(); blk_wait_while_drained(blk); @@ -1309,6 +1374,7 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, BdrvRequestFlags flags) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); @@ -1321,6 +1387,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { + IO_OR_GS_CODE(); return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags); } @@ -1349,22 +1416,26 @@ typedef struct BlkRwCo { int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, int64_t bytes, BdrvRequestFlags flags) { + IO_OR_GS_CODE(); return blk_pwritev_part(blk, offset, bytes, NULL, 0, flags | BDRV_REQ_ZERO_WRITE); } int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags) { + GLOBAL_STATE_CODE(); return bdrv_make_zero(blk->root, flags); } void blk_inc_in_flight(BlockBackend *blk) { + IO_CODE(); qatomic_inc(&blk->in_flight); } void blk_dec_in_flight(BlockBackend *blk) { + IO_CODE(); qatomic_dec(&blk->in_flight); aio_wait_kick(); } @@ -1383,6 +1454,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, void *opaque, int ret) { struct BlockBackendAIOCB *acb; + IO_CODE(); blk_inc_in_flight(blk); acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque); @@ -1490,6 +1562,7 @@ BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, int64_t bytes, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry, flags | BDRV_REQ_ZERO_WRITE, cb, opaque); } @@ -1498,6 +1571,7 @@ int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes) { int ret; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_OR_GS_CODE(); blk_inc_in_flight(blk); ret = blk_do_preadv(blk, offset, bytes, &qiov, 0); @@ -1511,6 +1585,7 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes, { int ret; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_OR_GS_CODE(); ret = blk_pwritev_part(blk, offset, bytes, &qiov, 0, flags); @@ -1519,6 +1594,7 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes, int64_t blk_getlength(BlockBackend *blk) { + IO_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -1528,6 +1604,7 @@ int64_t blk_getlength(BlockBackend *blk) void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr) { + IO_CODE(); if (!blk_bs(blk)) { *nb_sectors_ptr = 0; } else { @@ -1537,6 +1614,7 @@ void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr) int64_t blk_nb_sectors(BlockBackend *blk) { + IO_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -1548,6 +1626,7 @@ BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); assert((uint64_t)qiov->size <= INT64_MAX); return blk_aio_prwv(blk, offset, qiov->size, qiov, blk_aio_read_entry, flags, cb, opaque); @@ -1557,6 +1636,7 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, QEMUIOVector *qiov, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); assert((uint64_t)qiov->size <= INT64_MAX); return blk_aio_prwv(blk, offset, qiov->size, qiov, blk_aio_write_entry, flags, cb, opaque); @@ -1564,11 +1644,13 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, void blk_aio_cancel(BlockAIOCB *acb) { + GLOBAL_STATE_CODE(); bdrv_aio_cancel(acb); } void blk_aio_cancel_async(BlockAIOCB *acb) { + IO_CODE(); bdrv_aio_cancel_async(acb); } @@ -1576,6 +1658,8 @@ void blk_aio_cancel_async(BlockAIOCB *acb) int coroutine_fn blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) { + IO_CODE(); + blk_wait_while_drained(blk); if (!blk_is_available(blk)) { @@ -1588,6 +1672,7 @@ blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); ret = blk_do_ioctl(blk, req, buf); @@ -1609,6 +1694,7 @@ static void blk_aio_ioctl_entry(void *opaque) BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); } @@ -1617,6 +1703,7 @@ int coroutine_fn blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes) { int ret; + IO_CODE(); blk_wait_while_drained(blk); @@ -1641,6 +1728,7 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, cb, opaque); } @@ -1649,6 +1737,7 @@ int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); ret = blk_co_do_pdiscard(blk, offset, bytes); @@ -1660,6 +1749,7 @@ int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); ret = blk_do_pdiscard(blk, offset, bytes); @@ -1672,6 +1762,7 @@ int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes) int coroutine_fn blk_co_do_flush(BlockBackend *blk) { blk_wait_while_drained(blk); + IO_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; @@ -1692,12 +1783,14 @@ static void blk_aio_flush_entry(void *opaque) BlockAIOCB *blk_aio_flush(BlockBackend *blk, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); } int coroutine_fn blk_co_flush(BlockBackend *blk) { int ret; + IO_OR_GS_CODE(); blk_inc_in_flight(blk); ret = blk_co_do_flush(blk); @@ -1720,6 +1813,7 @@ int blk_flush(BlockBackend *blk) void blk_drain(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { bdrv_ref(bs); @@ -1740,6 +1834,8 @@ void blk_drain_all(void) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); + bdrv_drain_all_begin(); while ((blk = blk_all_next(blk)) != NULL) { @@ -1759,12 +1855,14 @@ void blk_drain_all(void) void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, BlockdevOnError on_write_error) { + GLOBAL_STATE_CODE(); blk->on_read_error = on_read_error; blk->on_write_error = on_write_error; } BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read) { + IO_CODE(); return is_read ? blk->on_read_error : blk->on_write_error; } @@ -1772,6 +1870,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, int error) { BlockdevOnError on_err = blk_get_on_error(blk, is_read); + IO_CODE(); switch (on_err) { case BLOCKDEV_ON_ERROR_ENOSPC: @@ -1811,6 +1910,7 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, bool is_read, int error) { assert(error >= 0); + IO_CODE(); if (action == BLOCK_ERROR_ACTION_STOP) { /* First set the iostatus, so that "info block" returns an iostatus @@ -1842,6 +1942,7 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, bool blk_supports_write_perm(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { return !bdrv_is_read_only(bs); @@ -1856,12 +1957,14 @@ bool blk_supports_write_perm(BlockBackend *blk) */ bool blk_is_writable(BlockBackend *blk) { + IO_CODE(); return blk->perm & BLK_PERM_WRITE; } bool blk_is_sg(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (!bs) { return false; @@ -1872,41 +1975,47 @@ bool blk_is_sg(BlockBackend *blk) bool blk_enable_write_cache(BlockBackend *blk) { + IO_CODE(); return blk->enable_write_cache; } void blk_set_enable_write_cache(BlockBackend *blk, bool wce) { + GLOBAL_STATE_CODE(); blk->enable_write_cache = wce; } -void blk_invalidate_cache(BlockBackend *blk, Error **errp) +void blk_activate(BlockBackend *blk, Error **errp) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (!bs) { error_setg(errp, "Device '%s' has no medium", blk->name); return; } - bdrv_invalidate_cache(bs, errp); + bdrv_activate(bs, errp); } bool blk_is_inserted(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); return bs && bdrv_is_inserted(bs); } bool blk_is_available(BlockBackend *blk) { + IO_CODE(); return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk); } void blk_lock_medium(BlockBackend *blk, bool locked) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); if (bs) { bdrv_lock_medium(bs, locked); @@ -1917,6 +2026,7 @@ void blk_eject(BlockBackend *blk, bool eject_flag) { BlockDriverState *bs = blk_bs(blk); char *id; + IO_CODE(); if (bs) { bdrv_eject(bs, eject_flag); @@ -1933,6 +2043,7 @@ void blk_eject(BlockBackend *blk, bool eject_flag) int blk_get_flags(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { return bdrv_get_flags(bs); @@ -1945,6 +2056,7 @@ int blk_get_flags(BlockBackend *blk) uint32_t blk_get_request_alignment(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE; } @@ -1953,6 +2065,7 @@ uint64_t blk_get_max_hw_transfer(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); uint64_t max = INT_MAX; + IO_CODE(); if (bs) { max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer); @@ -1966,6 +2079,7 @@ uint32_t blk_get_max_transfer(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); uint32_t max = INT_MAX; + IO_CODE(); if (bs) { max = MIN_NON_ZERO(max, bs->bl.max_transfer); @@ -1975,33 +2089,39 @@ uint32_t blk_get_max_transfer(BlockBackend *blk) int blk_get_max_hw_iov(BlockBackend *blk) { + IO_CODE(); return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov, blk->root->bs->bl.max_iov); } int blk_get_max_iov(BlockBackend *blk) { + IO_CODE(); return blk->root->bs->bl.max_iov; } void blk_set_guest_block_size(BlockBackend *blk, int align) { + IO_CODE(); blk->guest_block_size = align; } void *blk_try_blockalign(BlockBackend *blk, size_t size) { + IO_CODE(); return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size); } void *blk_blockalign(BlockBackend *blk, size_t size) { + IO_CODE(); return qemu_blockalign(blk ? blk_bs(blk) : NULL, size); } bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (!bs) { return false; @@ -2013,6 +2133,7 @@ bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp) void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { bdrv_op_unblock(bs, op, reason); @@ -2022,6 +2143,7 @@ void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason) void blk_op_block_all(BlockBackend *blk, Error *reason) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { bdrv_op_block_all(bs, reason); @@ -2031,6 +2153,7 @@ void blk_op_block_all(BlockBackend *blk, Error *reason) void blk_op_unblock_all(BlockBackend *blk, Error *reason) { BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); if (bs) { bdrv_op_unblock_all(bs, reason); @@ -2040,6 +2163,7 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason) AioContext *blk_get_aio_context(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); if (bs) { AioContext *ctx = bdrv_get_aio_context(blk_bs(blk)); @@ -2090,6 +2214,7 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, Error **errp) { + GLOBAL_STATE_CODE(); return blk_do_set_aio_context(blk, new_context, true, errp); } @@ -2126,6 +2251,7 @@ void blk_add_aio_context_notifier(BlockBackend *blk, { BlockBackendAioNotifier *notifier; BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); notifier = g_new(BlockBackendAioNotifier, 1); notifier->attached_aio_context = attached_aio_context; @@ -2148,6 +2274,8 @@ void blk_remove_aio_context_notifier(BlockBackend *blk, BlockBackendAioNotifier *notifier; BlockDriverState *bs = blk_bs(blk); + GLOBAL_STATE_CODE(); + if (bs) { bdrv_remove_aio_context_notifier(bs, attached_aio_context, detach_aio_context, opaque); @@ -2168,17 +2296,20 @@ void blk_remove_aio_context_notifier(BlockBackend *blk, void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify) { + GLOBAL_STATE_CODE(); notifier_list_add(&blk->remove_bs_notifiers, notify); } void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify) { + GLOBAL_STATE_CODE(); notifier_list_add(&blk->insert_bs_notifiers, notify); } void blk_io_plug(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); if (bs) { bdrv_io_plug(bs); @@ -2188,6 +2319,7 @@ void blk_io_plug(BlockBackend *blk) void blk_io_unplug(BlockBackend *blk) { BlockDriverState *bs = blk_bs(blk); + IO_CODE(); if (bs) { bdrv_io_unplug(bs); @@ -2196,18 +2328,21 @@ void blk_io_unplug(BlockBackend *blk) BlockAcctStats *blk_get_stats(BlockBackend *blk) { + IO_CODE(); return &blk->stats; } void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, BlockCompletionFunc *cb, void *opaque) { + IO_CODE(); return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque); } int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, int64_t bytes, BdrvRequestFlags flags) { + IO_OR_GS_CODE(); return blk_co_pwritev(blk, offset, bytes, NULL, flags | BDRV_REQ_ZERO_WRITE); } @@ -2216,6 +2351,7 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, int64_t bytes) { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_OR_GS_CODE(); return blk_pwritev_part(blk, offset, bytes, &qiov, 0, BDRV_REQ_WRITE_COMPRESSED); } @@ -2223,6 +2359,7 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) { + IO_OR_GS_CODE(); if (!blk_is_available(blk)) { error_setg(errp, "No medium inserted"); return -ENOMEDIUM; @@ -2235,6 +2372,7 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int64_t pos, int size) { int ret; + GLOBAL_STATE_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; @@ -2254,6 +2392,7 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size) { + GLOBAL_STATE_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -2263,6 +2402,7 @@ int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size) int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz) { + GLOBAL_STATE_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -2272,6 +2412,7 @@ int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz) int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo) { + GLOBAL_STATE_CODE(); if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -2285,6 +2426,7 @@ int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo) */ void blk_update_root_state(BlockBackend *blk) { + GLOBAL_STATE_CODE(); assert(blk->root); blk->root_state.open_flags = blk->root->bs->open_flags; @@ -2297,6 +2439,7 @@ void blk_update_root_state(BlockBackend *blk) */ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->root_state.detect_zeroes; } @@ -2306,17 +2449,20 @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) */ int blk_get_open_flags_from_root_state(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->root_state.open_flags; } BlockBackendRootState *blk_get_root_state(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return &blk->root_state; } int blk_commit_all(void) { BlockBackend *blk = NULL; + GLOBAL_STATE_CODE(); while ((blk = blk_all_next(blk)) != NULL) { AioContext *aio_context = blk_get_aio_context(blk); @@ -2341,6 +2487,7 @@ int blk_commit_all(void) /* throttling disk I/O limits */ void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg) { + GLOBAL_STATE_CODE(); throttle_group_config(&blk->public.throttle_group_member, cfg); } @@ -2349,6 +2496,7 @@ void blk_io_limits_disable(BlockBackend *blk) BlockDriverState *bs = blk_bs(blk); ThrottleGroupMember *tgm = &blk->public.throttle_group_member; assert(tgm->throttle_state); + GLOBAL_STATE_CODE(); if (bs) { bdrv_ref(bs); bdrv_drained_begin(bs); @@ -2364,12 +2512,14 @@ void blk_io_limits_disable(BlockBackend *blk) void blk_io_limits_enable(BlockBackend *blk, const char *group) { assert(!blk->public.throttle_group_member.throttle_state); + GLOBAL_STATE_CODE(); throttle_group_register_tgm(&blk->public.throttle_group_member, group, blk_get_aio_context(blk)); } void blk_io_limits_update_group(BlockBackend *blk, const char *group) { + GLOBAL_STATE_CODE(); /* this BB is not part of any group */ if (!blk->public.throttle_group_member.throttle_state) { return; @@ -2437,11 +2587,13 @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) void blk_register_buf(BlockBackend *blk, void *host, size_t size) { + GLOBAL_STATE_CODE(); bdrv_register_buf(blk_bs(blk), host, size); } void blk_unregister_buf(BlockBackend *blk, void *host) { + GLOBAL_STATE_CODE(); bdrv_unregister_buf(blk_bs(blk), host); } @@ -2451,6 +2603,8 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, BdrvRequestFlags write_flags) { int r; + IO_CODE(); + r = blk_check_byte_request(blk_in, off_in, bytes); if (r) { return r; @@ -2466,11 +2620,13 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, const BdrvChild *blk_root(BlockBackend *blk) { + GLOBAL_STATE_CODE(); return blk->root; } int blk_make_empty(BlockBackend *blk, Error **errp) { + GLOBAL_STATE_CODE(); if (!blk_is_available(blk)) { error_setg(errp, "No medium inserted"); return -ENOMEDIUM; diff --git a/block/block-copy.c b/block/block-copy.c index ce116318b5..ec46775ea5 100644 --- a/block/block-copy.c +++ b/block/block-copy.c @@ -17,11 +17,13 @@ #include "trace.h" #include "qapi/error.h" #include "block/block-copy.h" +#include "block/reqlist.h" #include "sysemu/block-backend.h" #include "qemu/units.h" #include "qemu/coroutine.h" #include "block/aio_task.h" #include "qemu/error-report.h" +#include "qemu/memalign.h" #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB) #define BLOCK_COPY_MAX_BUFFER (1 * MiB) @@ -83,7 +85,6 @@ typedef struct BlockCopyTask { */ BlockCopyState *s; BlockCopyCallState *call_state; - int64_t offset; /* * @method can also be set again in the while loop of * block_copy_dirty_clusters(), but it is never accessed concurrently @@ -94,21 +95,17 @@ typedef struct BlockCopyTask { BlockCopyMethod method; /* - * Fields whose state changes throughout the execution - * Protected by lock in BlockCopyState. - */ - CoQueue wait_queue; /* coroutines blocked on this task */ - /* - * Only protect the case of parallel read while updating @bytes - * value in block_copy_task_shrink(). + * Generally, req is protected by lock in BlockCopyState, Still req.offset + * is only set on task creation, so may be read concurrently after creation. + * req.bytes is changed at most once, and need only protecting the case of + * parallel read while updating @bytes value in block_copy_task_shrink(). */ - int64_t bytes; - QLIST_ENTRY(BlockCopyTask) list; + BlockReq req; } BlockCopyTask; static int64_t task_end(BlockCopyTask *task) { - return task->offset + task->bytes; + return task->req.offset + task->req.bytes; } typedef struct BlockCopyState { @@ -136,7 +133,7 @@ typedef struct BlockCopyState { CoMutex lock; int64_t in_flight_bytes; BlockCopyMethod method; - QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */ + BlockReqList reqs; QLIST_HEAD(, BlockCopyCallState) calls; /* * skip_unallocated: @@ -161,42 +158,6 @@ typedef struct BlockCopyState { } BlockCopyState; /* Called with lock held */ -static BlockCopyTask *find_conflicting_task(BlockCopyState *s, - int64_t offset, int64_t bytes) -{ - BlockCopyTask *t; - - QLIST_FOREACH(t, &s->tasks, list) { - if (offset + bytes > t->offset && offset < t->offset + t->bytes) { - return t; - } - } - - return NULL; -} - -/* - * If there are no intersecting tasks return false. Otherwise, wait for the - * first found intersecting tasks to finish and return true. - * - * Called with lock held. May temporary release the lock. - * Return value of 0 proves that lock was NOT released. - */ -static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset, - int64_t bytes) -{ - BlockCopyTask *task = find_conflicting_task(s, offset, bytes); - - if (!task) { - return false; - } - - qemu_co_queue_wait(&task->wait_queue, &s->lock); - - return true; -} - -/* Called with lock held */ static int64_t block_copy_chunk_size(BlockCopyState *s) { switch (s->method) { @@ -239,7 +200,7 @@ block_copy_task_create(BlockCopyState *s, BlockCopyCallState *call_state, bytes = QEMU_ALIGN_UP(bytes, s->cluster_size); /* region is dirty, so no existent tasks possible in it */ - assert(!find_conflicting_task(s, offset, bytes)); + assert(!reqlist_find_conflict(&s->reqs, offset, bytes)); bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes); s->in_flight_bytes += bytes; @@ -249,12 +210,9 @@ block_copy_task_create(BlockCopyState *s, BlockCopyCallState *call_state, .task.func = block_copy_task_entry, .s = s, .call_state = call_state, - .offset = offset, - .bytes = bytes, .method = s->method, }; - qemu_co_queue_init(&task->wait_queue); - QLIST_INSERT_HEAD(&s->tasks, task, list); + reqlist_init_req(&s->reqs, &task->req, offset, bytes); return task; } @@ -270,34 +228,34 @@ static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task, int64_t new_bytes) { QEMU_LOCK_GUARD(&task->s->lock); - if (new_bytes == task->bytes) { + if (new_bytes == task->req.bytes) { return; } - assert(new_bytes > 0 && new_bytes < task->bytes); + assert(new_bytes > 0 && new_bytes < task->req.bytes); - task->s->in_flight_bytes -= task->bytes - new_bytes; + task->s->in_flight_bytes -= task->req.bytes - new_bytes; bdrv_set_dirty_bitmap(task->s->copy_bitmap, - task->offset + new_bytes, task->bytes - new_bytes); + task->req.offset + new_bytes, + task->req.bytes - new_bytes); - task->bytes = new_bytes; - qemu_co_queue_restart_all(&task->wait_queue); + reqlist_shrink_req(&task->req, new_bytes); } static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret) { QEMU_LOCK_GUARD(&task->s->lock); - task->s->in_flight_bytes -= task->bytes; + task->s->in_flight_bytes -= task->req.bytes; if (ret < 0) { - bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->offset, task->bytes); + bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->req.offset, + task->req.bytes); } - QLIST_REMOVE(task, list); if (task->s->progress) { progress_set_remaining(task->s->progress, bdrv_get_dirty_count(task->s->copy_bitmap) + task->s->in_flight_bytes); } - qemu_co_queue_restart_all(&task->wait_queue); + reqlist_remove_req(&task->req); } void block_copy_state_free(BlockCopyState *s) @@ -384,8 +342,10 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target, } BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, + const BdrvDirtyBitmap *bitmap, Error **errp) { + ERRP_GUARD(); BlockCopyState *s; int64_t cluster_size; BdrvDirtyBitmap *copy_bitmap; @@ -402,6 +362,17 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, return NULL; } bdrv_disable_dirty_bitmap(copy_bitmap); + if (bitmap) { + if (!bdrv_merge_dirty_bitmap(copy_bitmap, bitmap, NULL, errp)) { + error_prepend(errp, "Failed to merge bitmap '%s' to internal " + "copy-bitmap: ", bdrv_dirty_bitmap_name(bitmap)); + bdrv_release_dirty_bitmap(copy_bitmap); + return NULL; + } + } else { + bdrv_set_dirty_bitmap(copy_bitmap, 0, + bdrv_dirty_bitmap_size(copy_bitmap)); + } /* * If source is in backing chain of target assume that target is going to be @@ -437,7 +408,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, ratelimit_init(&s->rate_limit); qemu_co_mutex_init(&s->lock); - QLIST_INIT(&s->tasks); + QLIST_INIT(&s->reqs); QLIST_INIT(&s->calls); return s; @@ -470,7 +441,7 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool, aio_task_pool_wait_slot(pool); if (aio_task_pool_status(pool) < 0) { - co_put_to_shres(task->s->mem, task->bytes); + co_put_to_shres(task->s->mem, task->req.bytes); block_copy_task_end(task, -ECANCELED); g_free(task); return -ECANCELED; @@ -583,7 +554,8 @@ static coroutine_fn int block_copy_task_entry(AioTask *task) BlockCopyMethod method = t->method; int ret; - ret = block_copy_do_copy(s, t->offset, t->bytes, &method, &error_is_read); + ret = block_copy_do_copy(s, t->req.offset, t->req.bytes, &method, + &error_is_read); WITH_QEMU_LOCK_GUARD(&s->lock) { if (s->method == t->method) { @@ -596,10 +568,10 @@ static coroutine_fn int block_copy_task_entry(AioTask *task) t->call_state->error_is_read = error_is_read; } } else if (s->progress) { - progress_work_done(s->progress, t->bytes); + progress_work_done(s->progress, t->req.bytes); } } - co_put_to_shres(s->mem, t->bytes); + co_put_to_shres(s->mem, t->req.bytes); block_copy_task_end(t, ret); return ret; @@ -679,6 +651,18 @@ static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset, } } +void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes) +{ + QEMU_LOCK_GUARD(&s->lock); + + bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes); + if (s->progress) { + progress_set_remaining(s->progress, + bdrv_get_dirty_count(s->copy_bitmap) + + s->in_flight_bytes); + } +} + /* * Reset bits in copy_bitmap starting at offset if they represent unallocated * data in the image. May reset subsequent contiguous bits. @@ -699,14 +683,7 @@ int64_t block_copy_reset_unallocated(BlockCopyState *s, bytes = clusters * s->cluster_size; if (!ret) { - qemu_co_mutex_lock(&s->lock); - bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes); - if (s->progress) { - progress_set_remaining(s->progress, - bdrv_get_dirty_count(s->copy_bitmap) + - s->in_flight_bytes); - } - qemu_co_mutex_unlock(&s->lock); + block_copy_reset(s, offset, bytes); } *count = bytes; @@ -753,22 +730,22 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state) trace_block_copy_skip_range(s, offset, bytes); break; } - if (task->offset > offset) { - trace_block_copy_skip_range(s, offset, task->offset - offset); + if (task->req.offset > offset) { + trace_block_copy_skip_range(s, offset, task->req.offset - offset); } found_dirty = true; - ret = block_copy_block_status(s, task->offset, task->bytes, + ret = block_copy_block_status(s, task->req.offset, task->req.bytes, &status_bytes); assert(ret >= 0); /* never fail */ - if (status_bytes < task->bytes) { + if (status_bytes < task->req.bytes) { block_copy_task_shrink(task, status_bytes); } if (qatomic_read(&s->skip_unallocated) && !(ret & BDRV_BLOCK_ALLOCATED)) { block_copy_task_end(task, 0); - trace_block_copy_skip_range(s, task->offset, task->bytes); + trace_block_copy_skip_range(s, task->req.offset, task->req.bytes); offset = task_end(task); bytes = end - offset; g_free(task); @@ -789,11 +766,11 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state) } } - ratelimit_calculate_delay(&s->rate_limit, task->bytes); + ratelimit_calculate_delay(&s->rate_limit, task->req.bytes); - trace_block_copy_process(s, task->offset); + trace_block_copy_process(s, task->req.offset); - co_get_from_shres(s->mem, task->bytes); + co_get_from_shres(s->mem, task->req.bytes); offset = task_end(task); bytes = end - offset; @@ -861,8 +838,8 @@ static int coroutine_fn block_copy_common(BlockCopyCallState *call_state) * Check that there is no task we still need to * wait to complete */ - ret = block_copy_wait_one(s, call_state->offset, - call_state->bytes); + ret = reqlist_wait_one(&s->reqs, call_state->offset, + call_state->bytes, &s->lock); if (ret == 0) { /* * No pending tasks, but check again the bitmap in this @@ -870,7 +847,7 @@ static int coroutine_fn block_copy_common(BlockCopyCallState *call_state) * between this and the critical section in * block_copy_dirty_clusters(). * - * block_copy_wait_one return value 0 also means that it + * reqlist_wait_one return value 0 also means that it * didn't release the lock. So, we are still in the same * critical section, not interrupted by any concurrent * access to state. diff --git a/block/commit.c b/block/commit.c index b1fc7b908b..851d1c557a 100644 --- a/block/commit.c +++ b/block/commit.c @@ -20,6 +20,7 @@ #include "qapi/error.h" #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" +#include "qemu/memalign.h" #include "sysemu/block-backend.h" enum { @@ -253,6 +254,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, uint64_t base_perms, iter_shared_perms; int ret; + GLOBAL_STATE_CODE(); + assert(top != bs); if (bdrv_skip_filters(top) == bdrv_skip_filters(base)) { error_setg(errp, "Invalid files for merge: top and base are the same"); @@ -432,6 +435,8 @@ int bdrv_commit(BlockDriverState *bs) QEMU_AUTO_VFREE uint8_t *buf = NULL; Error *local_err = NULL; + GLOBAL_STATE_CODE(); + if (!drv) return -ENOMEDIUM; diff --git a/block/copy-before-write.c b/block/copy-before-write.c index c30a5ff8de..a8a06fdc09 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -33,10 +33,37 @@ #include "block/block-copy.h" #include "block/copy-before-write.h" +#include "block/reqlist.h" + +#include "qapi/qapi-visit-block-core.h" typedef struct BDRVCopyBeforeWriteState { BlockCopyState *bcs; BdrvChild *target; + + /* + * @lock: protects access to @access_bitmap, @done_bitmap and + * @frozen_read_reqs + */ + CoMutex lock; + + /* + * @access_bitmap: represents areas allowed for reading by fleecing user. + * Reading from non-dirty areas leads to -EACCES. + */ + BdrvDirtyBitmap *access_bitmap; + + /* + * @done_bitmap: represents areas that was successfully copied to @target by + * copy-before-write operations. + */ + BdrvDirtyBitmap *done_bitmap; + + /* + * @frozen_read_reqs: current read requests for fleecing user in bs->file + * node. These areas must not be rewritten by guest. + */ + BlockReqList frozen_read_reqs; } BDRVCopyBeforeWriteState; static coroutine_fn int cbw_co_preadv( @@ -46,10 +73,20 @@ static coroutine_fn int cbw_co_preadv( return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); } +/* + * Do copy-before-write operation. + * + * On failure guest request must be failed too. + * + * On success, we also wait for all in-flight fleecing read requests in source + * node, and it's guaranteed that after cbw_do_copy_before_write() successful + * return there are no such requests and they will never appear. + */ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs, uint64_t offset, uint64_t bytes, BdrvRequestFlags flags) { BDRVCopyBeforeWriteState *s = bs->opaque; + int ret; uint64_t off, end; int64_t cluster_size = block_copy_cluster_size(s->bcs); @@ -60,7 +97,17 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs, off = QEMU_ALIGN_DOWN(offset, cluster_size); end = QEMU_ALIGN_UP(offset + bytes, cluster_size); - return block_copy(s->bcs, off, end - off, true); + ret = block_copy(s->bcs, off, end - off, true); + if (ret < 0) { + return ret; + } + + WITH_QEMU_LOCK_GUARD(&s->lock) { + bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off); + reqlist_wait_all(&s->frozen_read_reqs, off, end - off, &s->lock); + } + + return 0; } static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs, @@ -108,6 +155,142 @@ static int coroutine_fn cbw_co_flush(BlockDriverState *bs) return bdrv_co_flush(bs->file->bs); } +/* + * If @offset not accessible - return NULL. + * + * Otherwise, set @pnum to some bytes that accessible from @file (@file is set + * to bs->file or to s->target). Return newly allocated BlockReq object that + * should be than passed to cbw_snapshot_read_unlock(). + * + * It's guaranteed that guest writes will not interact in the region until + * cbw_snapshot_read_unlock() called. + */ +static BlockReq *cbw_snapshot_read_lock(BlockDriverState *bs, + int64_t offset, int64_t bytes, + int64_t *pnum, BdrvChild **file) +{ + BDRVCopyBeforeWriteState *s = bs->opaque; + BlockReq *req = g_new(BlockReq, 1); + bool done; + + QEMU_LOCK_GUARD(&s->lock); + + if (bdrv_dirty_bitmap_next_zero(s->access_bitmap, offset, bytes) != -1) { + g_free(req); + return NULL; + } + + done = bdrv_dirty_bitmap_status(s->done_bitmap, offset, bytes, pnum); + if (done) { + /* + * Special invalid BlockReq, that is handled in + * cbw_snapshot_read_unlock(). We don't need to lock something to read + * from s->target. + */ + *req = (BlockReq) {.offset = -1, .bytes = -1}; + *file = s->target; + } else { + reqlist_init_req(&s->frozen_read_reqs, req, offset, bytes); + *file = bs->file; + } + + return req; +} + +static void cbw_snapshot_read_unlock(BlockDriverState *bs, BlockReq *req) +{ + BDRVCopyBeforeWriteState *s = bs->opaque; + + if (req->offset == -1 && req->bytes == -1) { + g_free(req); + return; + } + + QEMU_LOCK_GUARD(&s->lock); + + reqlist_remove_req(req); + g_free(req); +} + +static coroutine_fn int +cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset) +{ + BlockReq *req; + BdrvChild *file; + int ret; + + /* TODO: upgrade to async loop using AioTask */ + while (bytes) { + int64_t cur_bytes; + + req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &file); + if (!req) { + return -EACCES; + } + + ret = bdrv_co_preadv_part(file, offset, cur_bytes, + qiov, qiov_offset, 0); + cbw_snapshot_read_unlock(bs, req); + if (ret < 0) { + return ret; + } + + bytes -= cur_bytes; + offset += cur_bytes; + qiov_offset += cur_bytes; + } + + return 0; +} + +static int coroutine_fn +cbw_co_snapshot_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + BDRVCopyBeforeWriteState *s = bs->opaque; + BlockReq *req; + int ret; + int64_t cur_bytes; + BdrvChild *child; + + req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &child); + if (!req) { + return -EACCES; + } + + ret = bdrv_block_status(child->bs, offset, cur_bytes, pnum, map, file); + if (child == s->target) { + /* + * We refer to s->target only for areas that we've written to it. + * And we can not report unallocated blocks in s->target: this will + * break generic block-status-above logic, that will go to + * copy-before-write filtered child in this case. + */ + assert(ret & BDRV_BLOCK_ALLOCATED); + } + + cbw_snapshot_read_unlock(bs, req); + + return ret; +} + +static int coroutine_fn cbw_co_pdiscard_snapshot(BlockDriverState *bs, + int64_t offset, int64_t bytes) +{ + BDRVCopyBeforeWriteState *s = bs->opaque; + + WITH_QEMU_LOCK_GUARD(&s->lock) { + bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes); + } + + block_copy_reset(s->bcs, offset, bytes); + + return bdrv_co_pdiscard(s->target, offset, bytes); +} + static void cbw_refresh_filename(BlockDriverState *bs) { pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), @@ -145,11 +328,54 @@ static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c, } } +static bool cbw_parse_bitmap_option(QDict *options, BdrvDirtyBitmap **bitmap, + Error **errp) +{ + QDict *bitmap_qdict = NULL; + BlockDirtyBitmap *bmp_param = NULL; + Visitor *v = NULL; + bool ret = false; + + *bitmap = NULL; + + qdict_extract_subqdict(options, &bitmap_qdict, "bitmap."); + if (!qdict_size(bitmap_qdict)) { + ret = true; + goto out; + } + + v = qobject_input_visitor_new_flat_confused(bitmap_qdict, errp); + if (!v) { + goto out; + } + + visit_type_BlockDirtyBitmap(v, NULL, &bmp_param, errp); + if (!bmp_param) { + goto out; + } + + *bitmap = block_dirty_bitmap_lookup(bmp_param->node, bmp_param->name, NULL, + errp); + if (!*bitmap) { + goto out; + } + + ret = true; + +out: + qapi_free_BlockDirtyBitmap(bmp_param); + visit_free(v); + qobject_unref(bitmap_qdict); + + return ret; +} + static int cbw_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVCopyBeforeWriteState *s = bs->opaque; - BdrvDirtyBitmap *copy_bitmap; + BdrvDirtyBitmap *bitmap = NULL; + int64_t cluster_size; bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, @@ -164,6 +390,10 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } + if (!cbw_parse_bitmap_option(options, &bitmap, errp)) { + return -EINVAL; + } + bs->total_sectors = bs->file->bs->total_sectors; bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); @@ -171,14 +401,32 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & bs->file->bs->supported_zero_flags); - s->bcs = block_copy_state_new(bs->file, s->target, errp); + s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp); if (!s->bcs) { error_prepend(errp, "Cannot create block-copy-state: "); return -EINVAL; } - copy_bitmap = block_copy_dirty_bitmap(s->bcs); - bdrv_set_dirty_bitmap(copy_bitmap, 0, bdrv_dirty_bitmap_size(copy_bitmap)); + cluster_size = block_copy_cluster_size(s->bcs); + + s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); + if (!s->done_bitmap) { + return -EINVAL; + } + bdrv_disable_dirty_bitmap(s->done_bitmap); + + /* s->access_bitmap starts equal to bcs bitmap */ + s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); + if (!s->access_bitmap) { + return -EINVAL; + } + bdrv_disable_dirty_bitmap(s->access_bitmap); + bdrv_dirty_bitmap_merge_internal(s->access_bitmap, + block_copy_dirty_bitmap(s->bcs), NULL, + true); + + qemu_co_mutex_init(&s->lock); + QLIST_INIT(&s->frozen_read_reqs); return 0; } @@ -187,6 +435,9 @@ static void cbw_close(BlockDriverState *bs) { BDRVCopyBeforeWriteState *s = bs->opaque; + bdrv_release_dirty_bitmap(s->access_bitmap); + bdrv_release_dirty_bitmap(s->done_bitmap); + block_copy_state_free(s->bcs); s->bcs = NULL; } @@ -204,6 +455,10 @@ BlockDriver bdrv_cbw_filter = { .bdrv_co_pdiscard = cbw_co_pdiscard, .bdrv_co_flush = cbw_co_flush, + .bdrv_co_preadv_snapshot = cbw_co_preadv_snapshot, + .bdrv_co_pdiscard_snapshot = cbw_co_pdiscard_snapshot, + .bdrv_co_snapshot_block_status = cbw_co_snapshot_block_status, + .bdrv_refresh_filename = cbw_refresh_filename, .bdrv_child_perm = cbw_child_perm, @@ -223,6 +478,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, QDict *opts; assert(source->total_sectors == target->total_sectors); + GLOBAL_STATE_CODE(); opts = qdict_new(); qdict_put_str(opts, "driver", "copy-before-write"); @@ -245,6 +501,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, void bdrv_cbw_drop(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); bdrv_drop_filter(bs, &error_abort); bdrv_unref(bs); } diff --git a/block/copy-before-write.h b/block/copy-before-write.h index 51847e711a..6e72bb25e9 100644 --- a/block/copy-before-write.h +++ b/block/copy-before-write.h @@ -29,6 +29,13 @@ #include "block/block_int.h" #include "block/block-copy.h" +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + BlockDriverState *bdrv_cbw_append(BlockDriverState *source, BlockDriverState *target, const char *filter_node_name, diff --git a/block/coroutines.h b/block/coroutines.h index c8c14a29c8..b293e943c8 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -30,17 +30,17 @@ /* For blk_bs() in generated block/block-gen.c */ #include "sysemu/block-backend.h" +/* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + int coroutine_fn bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp); -int generated_co_wrapper -bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes, - QEMUIOVector *qiov, BdrvRequestFlags flags); -int generated_co_wrapper -bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, - QEMUIOVector *qiov, BdrvRequestFlags flags); - int coroutine_fn bdrv_co_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, @@ -52,6 +52,51 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, int64_t *map, BlockDriverState **file, int *depth); + +int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs, + QEMUIOVector *qiov, int64_t pos); +int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs, + QEMUIOVector *qiov, int64_t pos); + +int coroutine_fn +nbd_co_do_establish_connection(BlockDriverState *bs, Error **errp); + + +int coroutine_fn +blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags); + + +int coroutine_fn +blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags); + +int coroutine_fn +blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf); + +int coroutine_fn +blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); + +int coroutine_fn blk_co_do_flush(BlockBackend *blk); + + +/* + * "I/O or GS" API functions. These functions can run without + * the BQL, but only in one specific iothread/main loop. + * + * See include/block/block-io.h for more information about + * the "I/O or GS" API. + */ + +int generated_co_wrapper +bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags); + +int generated_co_wrapper +bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags); + int generated_co_wrapper bdrv_common_block_status_above(BlockDriverState *bs, BlockDriverState *base, @@ -63,46 +108,24 @@ bdrv_common_block_status_above(BlockDriverState *bs, int64_t *map, BlockDriverState **file, int *depth); - -int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs, - QEMUIOVector *qiov, int64_t pos); -int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs, - QEMUIOVector *qiov, int64_t pos); - int generated_co_wrapper nbd_do_establish_connection(BlockDriverState *bs, Error **errp); -int coroutine_fn -nbd_co_do_establish_connection(BlockDriverState *bs, Error **errp); - int generated_co_wrapper blk_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); -int coroutine_fn -blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes, - QEMUIOVector *qiov, BdrvRequestFlags flags); - int generated_co_wrapper blk_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); -int coroutine_fn -blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, - BdrvRequestFlags flags); int generated_co_wrapper blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf); -int coroutine_fn -blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf); int generated_co_wrapper blk_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); -int coroutine_fn -blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); int generated_co_wrapper blk_do_flush(BlockBackend *blk); -int coroutine_fn blk_co_do_flush(BlockBackend *blk); #endif /* BLOCK_COROUTINES_INT_H */ diff --git a/block/create.c b/block/create.c index 89812669df..4df43f11f4 100644 --- a/block/create.c +++ b/block/create.c @@ -42,6 +42,8 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp) BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); int ret; + GLOBAL_STATE_CODE(); + job_progress_set_remaining(&s->common, 1); ret = s->drv->bdrv_co_create(s->opts, errp); job_progress_update(&s->common, 1); diff --git a/block/crypto.c b/block/crypto.c index c8ba4681e2..1ba82984ef 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -31,6 +31,7 @@ #include "qemu/module.h" #include "qemu/option.h" #include "qemu/cutils.h" +#include "qemu/memalign.h" #include "crypto.h" typedef struct BlockCrypto BlockCrypto; @@ -778,36 +779,54 @@ block_crypto_get_specific_info_luks(BlockDriverState *bs, Error **errp) } static int +block_crypto_amend_prepare(BlockDriverState *bs, Error **errp) +{ + BlockCrypto *crypto = bs->opaque; + int ret; + + /* apply for exclusive read/write permissions to the underlying file */ + crypto->updating_keys = true; + ret = bdrv_child_refresh_perms(bs, bs->file, errp); + if (ret < 0) { + /* Well, in this case we will not be updating any keys */ + crypto->updating_keys = false; + } + return ret; +} + +static void +block_crypto_amend_cleanup(BlockDriverState *bs) +{ + BlockCrypto *crypto = bs->opaque; + Error *errp = NULL; + + /* release exclusive read/write permissions to the underlying file */ + crypto->updating_keys = false; + bdrv_child_refresh_perms(bs, bs->file, &errp); + + if (errp) { + error_report_err(errp); + } +} + +static int block_crypto_amend_options_generic_luks(BlockDriverState *bs, QCryptoBlockAmendOptions *amend_options, bool force, Error **errp) { BlockCrypto *crypto = bs->opaque; - int ret; assert(crypto); assert(crypto->block); - /* apply for exclusive read/write permissions to the underlying file*/ - crypto->updating_keys = true; - ret = bdrv_child_refresh_perms(bs, bs->file, errp); - if (ret) { - goto cleanup; - } - - ret = qcrypto_block_amend_options(crypto->block, - block_crypto_read_func, - block_crypto_write_func, - bs, - amend_options, - force, - errp); -cleanup: - /* release exclusive read/write permissions to the underlying file*/ - crypto->updating_keys = false; - bdrv_child_refresh_perms(bs, bs->file, errp); - return ret; + return qcrypto_block_amend_options(crypto->block, + block_crypto_read_func, + block_crypto_write_func, + bs, + amend_options, + force, + errp); } static int @@ -833,8 +852,16 @@ block_crypto_amend_options_luks(BlockDriverState *bs, if (!amend_options) { goto cleanup; } + + ret = block_crypto_amend_prepare(bs, errp); + if (ret) { + goto perm_cleanup; + } ret = block_crypto_amend_options_generic_luks(bs, amend_options, force, errp); + +perm_cleanup: + block_crypto_amend_cleanup(bs); cleanup: qapi_free_QCryptoBlockAmendOptions(amend_options); return ret; @@ -931,6 +958,8 @@ static BlockDriver bdrv_crypto_luks = { .bdrv_get_specific_info = block_crypto_get_specific_info_luks, .bdrv_amend_options = block_crypto_amend_options_luks, .bdrv_co_amend = block_crypto_co_amend_luks, + .bdrv_amend_pre_run = block_crypto_amend_prepare, + .bdrv_amend_clean = block_crypto_amend_cleanup, .is_format = true, diff --git a/block/curl.c b/block/curl.c index 6a6cd72975..1e0f609579 100644 --- a/block/curl.c +++ b/block/curl.c @@ -458,38 +458,51 @@ static int curl_init_state(BDRVCURLState *s, CURLState *state) if (!state->curl) { return -EIO; } - curl_easy_setopt(state->curl, CURLOPT_URL, s->url); - curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER, - (long) s->sslverify); - curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYHOST, - s->sslverify ? 2L : 0L); + if (curl_easy_setopt(state->curl, CURLOPT_URL, s->url) || + curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER, + (long) s->sslverify) || + curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYHOST, + s->sslverify ? 2L : 0L)) { + goto err; + } if (s->cookie) { - curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie); + if (curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie)) { + goto err; + } + } + if (curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, (long)s->timeout) || + curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, + (void *)curl_read_cb) || + curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state) || + curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state) || + curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1) || + curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1) || + curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1) || + curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg) || + curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1)) { + goto err; } - curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, (long)s->timeout); - curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, - (void *)curl_read_cb); - curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state); - curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state); - curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1); - curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1); - curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1); - curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg); - curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1); - if (s->username) { - curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username); + if (curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username)) { + goto err; + } } if (s->password) { - curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password); + if (curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password)) { + goto err; + } } if (s->proxyusername) { - curl_easy_setopt(state->curl, - CURLOPT_PROXYUSERNAME, s->proxyusername); + if (curl_easy_setopt(state->curl, + CURLOPT_PROXYUSERNAME, s->proxyusername)) { + goto err; + } } if (s->proxypassword) { - curl_easy_setopt(state->curl, - CURLOPT_PROXYPASSWORD, s->proxypassword); + if (curl_easy_setopt(state->curl, + CURLOPT_PROXYPASSWORD, s->proxypassword)) { + goto err; + } } /* Restrict supported protocols to avoid security issues in the more @@ -499,18 +512,27 @@ static int curl_init_state(BDRVCURLState *s, CURLState *state) * Restricting protocols is only supported from 7.19.4 upwards. */ #if LIBCURL_VERSION_NUM >= 0x071304 - curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS); - curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS); + if (curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS) || + curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS)) { + goto err; + } #endif #ifdef DEBUG_VERBOSE - curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1); + if (curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1)) { + goto err; + } #endif } state->s = s; return 0; + +err: + curl_easy_cleanup(state->curl); + state->curl = NULL; + return -EIO; } /* Called with s->mutex held. */ @@ -759,14 +781,19 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, // Get file size if (curl_init_state(s, state) < 0) { + pstrcpy(state->errmsg, CURL_ERROR_SIZE, + "curl library initialization failed."); goto out; } s->accept_range = false; - curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1); - curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION, - curl_header_cb); - curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s); + if (curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1) || + curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION, curl_header_cb) || + curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s)) { + pstrcpy(state->errmsg, CURL_ERROR_SIZE, + "curl library initialization failed."); + goto out; + } if (curl_easy_perform(state->curl)) goto out; if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d)) { @@ -879,9 +906,8 @@ static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb) snprintf(state->range, 127, "%" PRIu64 "-%" PRIu64, start, end); trace_curl_setup_preadv(acb->bytes, start, state->range); - curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range); - - if (curl_multi_add_handle(s->multi, state->curl) != CURLM_OK) { + if (curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range) || + curl_multi_add_handle(s->multi, state->curl) != CURLM_OK) { state->acb[0] = NULL; acb->ret = -EIO; diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 0ef46163e3..da1b91166f 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -496,6 +496,7 @@ static void coroutine_fn bdrv_co_can_store_new_dirty_bitmap_entry(void *opaque) bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, uint32_t granularity, Error **errp) { + IO_CODE(); if (qemu_in_coroutine()) { return bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp); } else { @@ -656,6 +657,7 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) { + IO_CODE(); assert(!bdrv_dirty_bitmap_readonly(bitmap)); bdrv_dirty_bitmaps_lock(bitmap->bs); if (!out) { @@ -673,6 +675,7 @@ void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup) { HBitmap *tmp = bitmap->bitmap; assert(!bdrv_dirty_bitmap_readonly(bitmap)); + GLOBAL_STATE_CODE(); bitmap->bitmap = backup; hbitmap_free(tmp); } @@ -737,6 +740,7 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap) void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes) { BdrvDirtyBitmap *bitmap; + IO_CODE(); if (QLIST_EMPTY(&bs->dirty_bitmaps)) { return; @@ -875,16 +879,25 @@ bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap *bitmap, dirty_start, dirty_count); } +bool bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap, int64_t offset, + int64_t bytes, int64_t *count) +{ + return hbitmap_status(bitmap->bitmap, offset, bytes, count); +} + /** * bdrv_merge_dirty_bitmap: merge src into dest. * Ensures permissions on bitmaps are reasonable; use for public API. * * @backup: If provided, make a copy of dest here prior to merge. + * + * Returns true on success, false on failure. In case of failure bitmaps are + * untouched. */ -void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src, +bool bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src, HBitmap **backup, Error **errp) { - bool ret; + bool ret = false; bdrv_dirty_bitmaps_lock(dest->bs); if (src->bs != dest->bs) { @@ -912,6 +925,8 @@ out: if (src->bs != dest->bs) { bdrv_dirty_bitmaps_unlock(src->bs); } + + return ret; } /** @@ -928,6 +943,7 @@ bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest, bool lock) { bool ret; + IO_CODE(); assert(!bdrv_dirty_bitmap_readonly(dest)); assert(!bdrv_dirty_bitmap_inconsistent(dest)); diff --git a/block/dmg.c b/block/dmg.c index 447901fbb8..c626587f9c 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -27,6 +27,7 @@ #include "qemu/bswap.h" #include "qemu/error-report.h" #include "qemu/module.h" +#include "qemu/memalign.h" #include "dmg.h" int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in, diff --git a/block/export/export.c b/block/export/export.c index 6d3b9964c8..7253af3bc3 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -139,7 +139,7 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) * access since the export could be available before migration handover. * ctx was acquired in the caller. */ - bdrv_invalidate_cache(bs, NULL); + bdrv_activate(bs, NULL); perm = BLK_PERM_CONSISTENT_READ; if (export->writable) { diff --git a/block/export/fuse.c b/block/export/fuse.c index fdda8e3c81..e80b24a867 100644 --- a/block/export/fuse.c +++ b/block/export/fuse.c @@ -19,6 +19,7 @@ #define FUSE_USE_VERSION 31 #include "qemu/osdep.h" +#include "qemu/memalign.h" #include "block/aio.h" #include "block/block.h" #include "block/export.h" @@ -86,8 +87,8 @@ static int fuse_export_create(BlockExport *blk_exp, assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE); - /* For growable exports, take the RESIZE permission */ - if (args->growable) { + /* For growable and writable exports, take the RESIZE permission */ + if (args->growable || blk_exp_args->writable) { uint64_t blk_perm, blk_shared_perm; blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm); @@ -392,14 +393,23 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size, { uint64_t blk_perm, blk_shared_perm; BdrvRequestFlags truncate_flags = 0; - int ret; + bool add_resize_perm; + int ret, ret_check; + + /* Growable and writable exports have a permanent RESIZE permission */ + add_resize_perm = !exp->growable && !exp->writable; if (req_zero_write) { truncate_flags |= BDRV_REQ_ZERO_WRITE; } - /* Growable exports have a permanent RESIZE permission */ - if (!exp->growable) { + if (add_resize_perm) { + + if (!qemu_in_main_thread()) { + /* Changing permissions like below only works in the main thread */ + return -EPERM; + } + blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm); ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE, @@ -412,9 +422,11 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size, ret = blk_truncate(exp->common.blk, size, true, prealloc, truncate_flags, NULL); - if (!exp->growable) { + if (add_resize_perm) { /* Must succeed, because we are only giving up the RESIZE permission */ - blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort); + ret_check = blk_set_perm(exp->common.blk, blk_perm, + blk_shared_perm, &error_abort); + assert(ret_check == 0); } return ret; diff --git a/block/file-posix.c b/block/file-posix.c index 1f1756e192..c000a61db2 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -31,6 +31,7 @@ #include "qemu/module.h" #include "qemu/option.h" #include "qemu/units.h" +#include "qemu/memalign.h" #include "trace.h" #include "block/thread-pool.h" #include "qemu/iov.h" diff --git a/block/io.c b/block/io.c index 4e4cb556c5..3280144a17 100644 --- a/block/io.c +++ b/block/io.c @@ -32,6 +32,7 @@ #include "block/coroutines.h" #include "block/write-threshold.h" #include "qemu/cutils.h" +#include "qemu/memalign.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" @@ -70,6 +71,7 @@ static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c, void bdrv_parent_drained_end_single(BdrvChild *c) { int drained_end_counter = 0; + IO_OR_GS_CODE(); bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter); BDRV_POLL_WHILE(c->bs, qatomic_read(&drained_end_counter) > 0); } @@ -114,6 +116,7 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) { + IO_OR_GS_CODE(); c->parent_quiesce_counter++; if (c->klass->drained_begin) { c->klass->drained_begin(c); @@ -164,6 +167,8 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp) BdrvChild *c; bool have_limits; + GLOBAL_STATE_CODE(); + if (tran) { BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1); *s = (BdrvRefreshLimitsState) { @@ -189,10 +194,6 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp) QLIST_FOREACH(c, &bs->children, next) { if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW)) { - bdrv_refresh_limits(c->bs, tran, errp); - if (*errp) { - return; - } bdrv_merge_limits(&bs->bl, &c->bs->bl); have_limits = true; } @@ -226,12 +227,14 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp) */ void bdrv_enable_copy_on_read(BlockDriverState *bs) { + IO_CODE(); qatomic_inc(&bs->copy_on_read); } void bdrv_disable_copy_on_read(BlockDriverState *bs) { int old = qatomic_fetch_dec(&bs->copy_on_read); + IO_CODE(); assert(old >= 1); } @@ -303,6 +306,7 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, BdrvChild *ignore_parent, bool ignore_bds_parents) { BdrvChild *child, *next; + IO_OR_GS_CODE(); if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { return true; @@ -426,6 +430,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent, bool ignore_bds_parents) { + IO_OR_GS_CODE(); assert(!qemu_in_coroutine()); /* Stop things in parent-to-child order */ @@ -477,11 +482,13 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, void bdrv_drained_begin(BlockDriverState *bs) { + IO_OR_GS_CODE(); bdrv_do_drained_begin(bs, false, NULL, false, true); } void bdrv_subtree_drained_begin(BlockDriverState *bs) { + IO_OR_GS_CODE(); bdrv_do_drained_begin(bs, true, NULL, false, true); } @@ -538,18 +545,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, void bdrv_drained_end(BlockDriverState *bs) { int drained_end_counter = 0; + IO_OR_GS_CODE(); bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter); BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); } void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter) { + IO_CODE(); bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter); } void bdrv_subtree_drained_end(BlockDriverState *bs) { int drained_end_counter = 0; + IO_OR_GS_CODE(); bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter); BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); } @@ -557,6 +567,7 @@ void bdrv_subtree_drained_end(BlockDriverState *bs) void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) { int i; + IO_OR_GS_CODE(); for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { bdrv_do_drained_begin(child->bs, true, child, false, true); @@ -567,6 +578,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) { int drained_end_counter = 0; int i; + IO_OR_GS_CODE(); for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { bdrv_do_drained_end(child->bs, true, child, false, @@ -585,6 +597,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) */ void coroutine_fn bdrv_co_drain(BlockDriverState *bs) { + IO_OR_GS_CODE(); assert(qemu_in_coroutine()); bdrv_drained_begin(bs); bdrv_drained_end(bs); @@ -592,6 +605,7 @@ void coroutine_fn bdrv_co_drain(BlockDriverState *bs) void bdrv_drain(BlockDriverState *bs) { + IO_OR_GS_CODE(); bdrv_drained_begin(bs); bdrv_drained_end(bs); } @@ -612,6 +626,7 @@ static bool bdrv_drain_all_poll(void) { BlockDriverState *bs = NULL; bool result = false; + GLOBAL_STATE_CODE(); /* bdrv_drain_poll() can't make changes to the graph and we are holding the * main AioContext lock, so iterating bdrv_next_all_states() is safe. */ @@ -640,6 +655,7 @@ static bool bdrv_drain_all_poll(void) void bdrv_drain_all_begin(void) { BlockDriverState *bs = NULL; + GLOBAL_STATE_CODE(); if (qemu_in_coroutine()) { bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL); @@ -682,6 +698,7 @@ void bdrv_drain_all_begin(void) void bdrv_drain_all_end_quiesce(BlockDriverState *bs) { int drained_end_counter = 0; + GLOBAL_STATE_CODE(); g_assert(bs->quiesce_counter > 0); g_assert(!bs->refcnt); @@ -696,6 +713,7 @@ void bdrv_drain_all_end(void) { BlockDriverState *bs = NULL; int drained_end_counter = 0; + GLOBAL_STATE_CODE(); /* * bdrv queue is managed by record/replay, @@ -723,6 +741,7 @@ void bdrv_drain_all_end(void) void bdrv_drain_all(void) { + GLOBAL_STATE_CODE(); bdrv_drain_all_begin(); bdrv_drain_all_end(); } @@ -867,6 +886,7 @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs) { BdrvTrackedRequest *req; Coroutine *self = qemu_coroutine_self(); + IO_CODE(); QLIST_FOREACH(req, &bs->tracked_requests, list) { if (req->co == self) { @@ -886,7 +906,7 @@ void bdrv_round_to_clusters(BlockDriverState *bs, int64_t *cluster_bytes) { BlockDriverInfo bdi; - + IO_CODE(); if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) { *cluster_offset = offset; *cluster_bytes = bytes; @@ -912,16 +932,19 @@ static int bdrv_get_cluster_size(BlockDriverState *bs) void bdrv_inc_in_flight(BlockDriverState *bs) { + IO_CODE(); qatomic_inc(&bs->in_flight); } void bdrv_wakeup(BlockDriverState *bs) { + IO_CODE(); aio_wait_kick(); } void bdrv_dec_in_flight(BlockDriverState *bs) { + IO_CODE(); qatomic_dec(&bs->in_flight); bdrv_wakeup(bs); } @@ -946,6 +969,7 @@ bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req, uint64_t align) { bool waited; + IO_CODE(); qemu_co_mutex_lock(&req->bs->reqs_lock); @@ -1040,6 +1064,7 @@ static int bdrv_check_request32(int64_t offset, int64_t bytes, int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int64_t bytes, BdrvRequestFlags flags) { + IO_CODE(); return bdrv_pwritev(child, offset, bytes, NULL, BDRV_REQ_ZERO_WRITE | flags); } @@ -1058,6 +1083,7 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags) int ret; int64_t target_size, bytes, offset = 0; BlockDriverState *bs = child->bs; + IO_CODE(); target_size = bdrv_getlength(bs); if (target_size < 0) { @@ -1090,6 +1116,7 @@ int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes) { int ret; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_CODE(); if (bytes < 0) { return -EINVAL; @@ -1111,6 +1138,7 @@ int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, { int ret; QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_CODE(); if (bytes < 0) { return -EINVAL; @@ -1131,6 +1159,7 @@ int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, const void *buf, int64_t count) { int ret; + IO_CODE(); ret = bdrv_pwrite(child, offset, buf, count); if (ret < 0) { @@ -1797,6 +1826,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { + IO_CODE(); return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags); } @@ -1809,6 +1839,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, BdrvTrackedRequest req; BdrvRequestPadding pad; int ret; + IO_CODE(); trace_bdrv_co_preadv_part(bs, offset, bytes, flags); @@ -2173,6 +2204,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, padding = bdrv_init_padding(bs, offset, bytes, &pad); if (padding) { + assert(!(flags & BDRV_REQ_NO_WAIT)); bdrv_make_request_serialising(req, align); bdrv_padding_rmw_read(child, req, &pad, true); @@ -2230,6 +2262,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { + IO_CODE(); return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags); } @@ -2243,6 +2276,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, BdrvRequestPadding pad; int ret; bool padded = false; + IO_CODE(); trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags); @@ -2307,6 +2341,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, * serialize the request to prevent interactions of the * widened region with other transactions. */ + assert(!(flags & BDRV_REQ_NO_WAIT)); bdrv_make_request_serialising(&req, align); bdrv_padding_rmw_read(child, &req, &pad, false); } @@ -2326,6 +2361,7 @@ out: int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, int64_t bytes, BdrvRequestFlags flags) { + IO_CODE(); trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags); if (!(child->bs->open_flags & BDRV_O_UNMAP)) { @@ -2345,6 +2381,8 @@ int bdrv_flush_all(void) BlockDriverState *bs = NULL; int result = 0; + GLOBAL_STATE_CODE(); + /* * bdrv queue is managed by record/replay, * creating new flush request for stopping @@ -2639,6 +2677,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, BlockDriverState *p; int64_t eof = 0; int dummy; + IO_CODE(); assert(!include_base || base); /* Can't include NULL base */ @@ -2728,6 +2767,7 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { + IO_CODE(); return bdrv_common_block_status_above(bs, base, false, true, offset, bytes, pnum, map, file, NULL); } @@ -2735,6 +2775,7 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file) { + IO_CODE(); return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs), offset, bytes, pnum, map, file); } @@ -2751,6 +2792,7 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, { int ret; int64_t pnum = bytes; + IO_CODE(); if (!bytes) { return 1; @@ -2771,6 +2813,7 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset, { int ret; int64_t dummy; + IO_CODE(); ret = bdrv_common_block_status_above(bs, bs, true, false, offset, bytes, pnum ? pnum : &dummy, NULL, @@ -2807,6 +2850,7 @@ int bdrv_is_allocated_above(BlockDriverState *top, int ret = bdrv_common_block_status_above(top, base, include_base, false, offset, bytes, pnum, NULL, NULL, &depth); + IO_CODE(); if (ret < 0) { return ret; } @@ -2823,6 +2867,7 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) BlockDriver *drv = bs->drv; BlockDriverState *child_bs = bdrv_primary_bs(bs); int ret; + IO_CODE(); ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL); if (ret < 0) { @@ -2854,6 +2899,7 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) BlockDriver *drv = bs->drv; BlockDriverState *child_bs = bdrv_primary_bs(bs); int ret; + IO_CODE(); ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL); if (ret < 0) { @@ -2884,6 +2930,7 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size); int ret = bdrv_writev_vmstate(bs, &qiov, pos); + IO_CODE(); return ret < 0 ? ret : size; } @@ -2893,6 +2940,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, { QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size); int ret = bdrv_readv_vmstate(bs, &qiov, pos); + IO_CODE(); return ret < 0 ? ret : size; } @@ -2902,6 +2950,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, void bdrv_aio_cancel(BlockAIOCB *acb) { + IO_CODE(); qemu_aio_ref(acb); bdrv_aio_cancel_async(acb); while (acb->refcnt > 1) { @@ -2926,6 +2975,7 @@ void bdrv_aio_cancel(BlockAIOCB *acb) * In either case the completion callback must be called. */ void bdrv_aio_cancel_async(BlockAIOCB *acb) { + IO_CODE(); if (acb->aiocb_info->cancel_async) { acb->aiocb_info->cancel_async(acb); } @@ -2940,6 +2990,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) BdrvChild *child; int current_gen; int ret = 0; + IO_CODE(); bdrv_inc_in_flight(bs); @@ -3065,6 +3116,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t max_pdiscard; int head, tail, align; BlockDriverState *bs = child->bs; + IO_CODE(); if (!bs || !bs->drv || !bdrv_is_inserted(bs)) { return -ENOMEDIUM; @@ -3183,6 +3235,7 @@ int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf) .coroutine = qemu_coroutine_self(), }; BlockAIOCB *acb; + IO_CODE(); bdrv_inc_in_flight(bs); if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) { @@ -3207,17 +3260,20 @@ out: void *qemu_blockalign(BlockDriverState *bs, size_t size) { + IO_CODE(); return qemu_memalign(bdrv_opt_mem_align(bs), size); } void *qemu_blockalign0(BlockDriverState *bs, size_t size) { + IO_CODE(); return memset(qemu_blockalign(bs, size), 0, size); } void *qemu_try_blockalign(BlockDriverState *bs, size_t size) { size_t align = bdrv_opt_mem_align(bs); + IO_CODE(); /* Ensure that NULL is never returned on success */ assert(align > 0); @@ -3231,6 +3287,7 @@ void *qemu_try_blockalign(BlockDriverState *bs, size_t size) void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) { void *mem = qemu_try_blockalign(bs, size); + IO_CODE(); if (mem) { memset(mem, 0, size); @@ -3246,6 +3303,7 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) { int i; size_t alignment = bdrv_min_mem_align(bs); + IO_CODE(); for (i = 0; i < qiov->niov; i++) { if ((uintptr_t) qiov->iov[i].iov_base % alignment) { @@ -3262,6 +3320,7 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) void bdrv_io_plug(BlockDriverState *bs) { BdrvChild *child; + IO_CODE(); QLIST_FOREACH(child, &bs->children, next) { bdrv_io_plug(child->bs); @@ -3278,6 +3337,7 @@ void bdrv_io_plug(BlockDriverState *bs) void bdrv_io_unplug(BlockDriverState *bs) { BdrvChild *child; + IO_CODE(); assert(bs->io_plugged); if (qatomic_fetch_dec(&bs->io_plugged) == 1) { @@ -3296,6 +3356,7 @@ void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size) { BdrvChild *child; + GLOBAL_STATE_CODE(); if (bs->drv && bs->drv->bdrv_register_buf) { bs->drv->bdrv_register_buf(bs, host, size); } @@ -3308,6 +3369,7 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host) { BdrvChild *child; + GLOBAL_STATE_CODE(); if (bs->drv && bs->drv->bdrv_unregister_buf) { bs->drv->bdrv_unregister_buf(bs, host); } @@ -3328,6 +3390,8 @@ static int coroutine_fn bdrv_co_copy_range_internal( /* TODO We can support BDRV_REQ_NO_FALLBACK here */ assert(!(read_flags & BDRV_REQ_NO_FALLBACK)); assert(!(write_flags & BDRV_REQ_NO_FALLBACK)); + assert(!(read_flags & BDRV_REQ_NO_WAIT)); + assert(!(write_flags & BDRV_REQ_NO_WAIT)); if (!dst || !dst->bs || !bdrv_is_inserted(dst->bs)) { return -ENOMEDIUM; @@ -3402,6 +3466,7 @@ int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { + IO_CODE(); trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes, read_flags, write_flags); return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset, @@ -3418,6 +3483,7 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { + IO_CODE(); trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes, read_flags, write_flags); return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset, @@ -3429,6 +3495,7 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags) { + IO_CODE(); return bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes, read_flags, write_flags); @@ -3461,7 +3528,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, BdrvTrackedRequest req; int64_t old_size, new_bytes; int ret; - + IO_CODE(); /* if bs->drv == NULL, bs is closed, so there's nothing to do here */ if (!drv) { @@ -3579,6 +3646,7 @@ out: void bdrv_cancel_in_flight(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); if (!bs || !bs->drv) { return; } @@ -3587,3 +3655,75 @@ void bdrv_cancel_in_flight(BlockDriverState *bs) bs->drv->bdrv_cancel_in_flight(bs); } } + +int coroutine_fn +bdrv_co_preadv_snapshot(BdrvChild *child, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset) +{ + BlockDriverState *bs = child->bs; + BlockDriver *drv = bs->drv; + int ret; + IO_CODE(); + + if (!drv) { + return -ENOMEDIUM; + } + + if (!drv->bdrv_co_preadv_snapshot) { + return -ENOTSUP; + } + + bdrv_inc_in_flight(bs); + ret = drv->bdrv_co_preadv_snapshot(bs, offset, bytes, qiov, qiov_offset); + bdrv_dec_in_flight(bs); + + return ret; +} + +int coroutine_fn +bdrv_co_snapshot_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + BlockDriver *drv = bs->drv; + int ret; + IO_CODE(); + + if (!drv) { + return -ENOMEDIUM; + } + + if (!drv->bdrv_co_snapshot_block_status) { + return -ENOTSUP; + } + + bdrv_inc_in_flight(bs); + ret = drv->bdrv_co_snapshot_block_status(bs, want_zero, offset, bytes, + pnum, map, file); + bdrv_dec_in_flight(bs); + + return ret; +} + +int coroutine_fn +bdrv_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes) +{ + BlockDriver *drv = bs->drv; + int ret; + IO_CODE(); + + if (!drv) { + return -ENOMEDIUM; + } + + if (!drv->bdrv_co_pdiscard_snapshot) { + return -ENOTSUP; + } + + bdrv_inc_in_flight(bs); + ret = drv->bdrv_co_pdiscard_snapshot(bs, offset, bytes); + bdrv_dec_in_flight(bs); + + return ret; +} diff --git a/block/meson.build b/block/meson.build index 8a1ce58c9c..0b2a60c99b 100644 --- a/block/meson.build +++ b/block/meson.build @@ -32,7 +32,9 @@ block_ss.add(files( 'qcow2.c', 'quorum.c', 'raw-format.c', + 'reqlist.c', 'snapshot.c', + 'snapshot-access.c', 'throttle-groups.c', 'throttle.c', 'vhdx-endian.c', @@ -131,8 +133,11 @@ block_ss.add(module_block_h) wrapper_py = find_program('../scripts/block-coroutine-wrapper.py') block_gen_c = custom_target('block-gen.c', output: 'block-gen.c', - input: files('../include/block/block.h', - 'coroutines.h'), + input: files( + '../include/block/block-io.h', + '../include/block/block-global-state.h', + 'coroutines.h' + ), command: [wrapper_py, '@OUTPUT@', '@INPUT@']) block_ss.add(block_gen_c) diff --git a/block/mirror.c b/block/mirror.c index 69b2c1c697..d8ecb9efa2 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -23,6 +23,7 @@ #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" #include "qemu/bitmap.h" +#include "qemu/memalign.h" #define MAX_IN_FLIGHT 16 #define MAX_IO_BYTES (1 << 20) /* 1 Mb */ @@ -1864,6 +1865,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs, bool is_none_mode; BlockDriverState *base; + GLOBAL_STATE_CODE(); + if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) || (mode == MIRROR_SYNC_MODE_BITMAP)) { error_setg(errp, "Sync mode '%s' not supported", @@ -1889,6 +1892,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, bool base_read_only; BlockJob *job; + GLOBAL_STATE_CODE(); + base_read_only = bdrv_is_read_only(base); if (base_read_only) { diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c index 9f11deec64..8e35616c2e 100644 --- a/block/monitor/bitmap-qmp-cmds.c +++ b/block/monitor/bitmap-qmp-cmds.c @@ -56,6 +56,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, BlockDriverState *bs; BdrvDirtyBitmap *bitmap; + GLOBAL_STATE_CODE(); + if (!node) { error_setg(errp, "Node cannot be NULL"); return NULL; @@ -155,6 +157,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, BdrvDirtyBitmap *bitmap; AioContext *aio_context; + GLOBAL_STATE_CODE(); + bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); if (!bitmap || !bs) { return NULL; @@ -259,7 +263,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, BlockDriverState *bs; BdrvDirtyBitmap *dst, *src, *anon; BlockDirtyBitmapMergeSourceList *lst; - Error *local_err = NULL; + + GLOBAL_STATE_CODE(); dst = block_dirty_bitmap_lookup(node, target, &bs, errp); if (!dst) { @@ -297,9 +302,7 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, abort(); } - bdrv_merge_dirty_bitmap(anon, src, NULL, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!bdrv_merge_dirty_bitmap(anon, src, NULL, errp)) { dst = NULL; goto out; } diff --git a/block/nbd.c b/block/nbd.c index 5853d85d60..146d25660e 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -313,6 +313,7 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs, BDRVNBDState *s = (BDRVNBDState *)bs->opaque; int ret; bool blocking = nbd_client_connecting_wait(s); + IO_CODE(); assert(!s->ioc); diff --git a/block/nvme.c b/block/nvme.c index dd20de3865..552029931d 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -21,6 +21,7 @@ #include "qemu/module.h" #include "qemu/cutils.h" #include "qemu/option.h" +#include "qemu/memalign.h" #include "qemu/vfio-helpers.h" #include "block/block_int.h" #include "sysemu/replay.h" diff --git a/block/parallels-ext.c b/block/parallels-ext.c index e0dd0975c6..cb22a427d7 100644 --- a/block/parallels-ext.c +++ b/block/parallels-ext.c @@ -29,6 +29,7 @@ #include "parallels.h" #include "crypto/hash.h" #include "qemu/uuid.h" +#include "qemu/memalign.h" #define PARALLELS_FORMAT_EXTENSION_MAGIC 0xAB234CEF23DCEA87ULL diff --git a/block/parallels.c b/block/parallels.c index 6ebad2a2bb..cd23e02d06 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -41,6 +41,7 @@ #include "qapi/qapi-visit-block-core.h" #include "qemu/bswap.h" #include "qemu/bitmap.h" +#include "qemu/memalign.h" #include "migration/blocker.h" #include "parallels.h" @@ -873,7 +874,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->bat_dirty_bmap = bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block)); - /* Disable migration until bdrv_invalidate_cache method is added */ + /* Disable migration until bdrv_activate method is added */ error_setg(&s->migration_blocker, "The Parallels format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); diff --git a/block/preallocate.c b/block/preallocate.c index 1d4233f730..e15cb8c74a 100644 --- a/block/preallocate.c +++ b/block/preallocate.c @@ -276,6 +276,10 @@ static bool coroutine_fn handle_write(BlockDriverState *bs, int64_t offset, int64_t end = offset + bytes; int64_t prealloc_start, prealloc_end; int ret; + uint32_t file_align = bs->file->bs->bl.request_alignment; + uint32_t prealloc_align = MAX(s->opts.prealloc_align, file_align); + + assert(QEMU_IS_ALIGNED(prealloc_align, file_align)); if (!has_prealloc_perms(bs)) { /* We don't have state neither should try to recover it */ @@ -320,9 +324,14 @@ static bool coroutine_fn handle_write(BlockDriverState *bs, int64_t offset, /* Now we want new preallocation, as request writes beyond s->file_end. */ - prealloc_start = want_merge_zero ? MIN(offset, s->file_end) : s->file_end; - prealloc_end = QEMU_ALIGN_UP(end + s->opts.prealloc_size, - s->opts.prealloc_align); + prealloc_start = QEMU_ALIGN_UP( + want_merge_zero ? MIN(offset, s->file_end) : s->file_end, + file_align); + prealloc_end = QEMU_ALIGN_UP( + MAX(prealloc_start, end) + s->opts.prealloc_size, + prealloc_align); + + want_merge_zero = want_merge_zero && (prealloc_start <= offset); ret = bdrv_co_pwrite_zeroes( bs->file, prealloc_start, prealloc_end - prealloc_start, diff --git a/block/qcow.c b/block/qcow.c index c39940f33e..4fba1b9e36 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -32,6 +32,7 @@ #include "qemu/option.h" #include "qemu/bswap.h" #include "qemu/cutils.h" +#include "qemu/memalign.h" #include <zlib.h> #include "qapi/qmp/qdict.h" #include "qapi/qmp/qstring.h" diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 7444b9c4ab..8a0105911f 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -23,6 +23,7 @@ */ #include "qemu/osdep.h" +#include "qemu/memalign.h" #include "qcow2.h" #include "trace.h" diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 21884a1ab9..20a16ba6ee 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -28,6 +28,7 @@ #include "qapi/error.h" #include "qcow2.h" #include "qemu/bswap.h" +#include "qemu/memalign.h" #include "trace.h" int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t exact_size) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 4614572252..94033972be 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -28,6 +28,7 @@ #include "qemu/range.h" #include "qemu/bswap.h" #include "qemu/cutils.h" +#include "qemu/memalign.h" #include "trace.h" static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size, diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 71ddb08c21..075269a023 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -29,6 +29,7 @@ #include "qemu/bswap.h" #include "qemu/error-report.h" #include "qemu/cutils.h" +#include "qemu/memalign.h" static void qcow2_free_single_snapshot(BlockDriverState *bs, int i) { diff --git a/block/qcow2.c b/block/qcow2.c index c8115e1cba..b5c47931ef 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -38,6 +38,7 @@ #include "qemu/option_int.h" #include "qemu/cutils.h" #include "qemu/bswap.h" +#include "qemu/memalign.h" #include "qapi/qobject-input-visitor.h" #include "qapi/qapi-visit-block-core.h" #include "crypto.h" diff --git a/block/qed-l2-cache.c b/block/qed-l2-cache.c index b548362398..caf2c024c2 100644 --- a/block/qed-l2-cache.c +++ b/block/qed-l2-cache.c @@ -51,6 +51,7 @@ */ #include "qemu/osdep.h" +#include "qemu/memalign.h" #include "trace.h" #include "qed.h" diff --git a/block/qed-table.c b/block/qed-table.c index 405d446cbe..1cc844b1a5 100644 --- a/block/qed-table.c +++ b/block/qed-table.c @@ -17,6 +17,7 @@ #include "qemu/sockets.h" /* for EINPROGRESS on Windows */ #include "qed.h" #include "qemu/bswap.h" +#include "qemu/memalign.h" /* Called with table_lock held. */ static int coroutine_fn qed_read_table(BDRVQEDState *s, uint64_t offset, diff --git a/block/qed.c b/block/qed.c index 558d3646c4..f34d9a3ac1 100644 --- a/block/qed.c +++ b/block/qed.c @@ -20,6 +20,7 @@ #include "qemu/main-loop.h" #include "qemu/module.h" #include "qemu/option.h" +#include "qemu/memalign.h" #include "trace.h" #include "qed.h" #include "sysemu/block-backend.h" diff --git a/block/quorum.c b/block/quorum.c index c28dda7baa..f33f30d36b 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -17,6 +17,7 @@ #include "qemu/cutils.h" #include "qemu/module.h" #include "qemu/option.h" +#include "qemu/memalign.h" #include "block/block_int.h" #include "block/coroutines.h" #include "block/qdict.h" diff --git a/block/raw-format.c b/block/raw-format.c index bda757fd19..69fd650eaf 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -31,6 +31,7 @@ #include "qapi/error.h" #include "qemu/module.h" #include "qemu/option.h" +#include "qemu/memalign.h" typedef struct BDRVRawState { uint64_t offset; diff --git a/block/reqlist.c b/block/reqlist.c new file mode 100644 index 0000000000..08cb57cfa4 --- /dev/null +++ b/block/reqlist.c @@ -0,0 +1,85 @@ +/* + * reqlist API + * + * Copyright (C) 2013 Proxmox Server Solutions + * Copyright (c) 2021 Virtuozzo International GmbH. + * + * Authors: + * Dietmar Maurer (dietmar@proxmox.com) + * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/range.h" + +#include "block/reqlist.h" + +void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset, + int64_t bytes) +{ + assert(!reqlist_find_conflict(reqs, offset, bytes)); + + *req = (BlockReq) { + .offset = offset, + .bytes = bytes, + }; + qemu_co_queue_init(&req->wait_queue); + QLIST_INSERT_HEAD(reqs, req, list); +} + +BlockReq *reqlist_find_conflict(BlockReqList *reqs, int64_t offset, + int64_t bytes) +{ + BlockReq *r; + + QLIST_FOREACH(r, reqs, list) { + if (ranges_overlap(offset, bytes, r->offset, r->bytes)) { + return r; + } + } + + return NULL; +} + +bool coroutine_fn reqlist_wait_one(BlockReqList *reqs, int64_t offset, + int64_t bytes, CoMutex *lock) +{ + BlockReq *r = reqlist_find_conflict(reqs, offset, bytes); + + if (!r) { + return false; + } + + qemu_co_queue_wait(&r->wait_queue, lock); + + return true; +} + +void coroutine_fn reqlist_wait_all(BlockReqList *reqs, int64_t offset, + int64_t bytes, CoMutex *lock) +{ + while (reqlist_wait_one(reqs, offset, bytes, lock)) { + /* continue */ + } +} + +void coroutine_fn reqlist_shrink_req(BlockReq *req, int64_t new_bytes) +{ + if (new_bytes == req->bytes) { + return; + } + + assert(new_bytes > 0 && new_bytes < req->bytes); + + req->bytes = new_bytes; + qemu_co_queue_restart_all(&req->wait_queue); +} + +void coroutine_fn reqlist_remove_req(BlockReq *req) +{ + QLIST_REMOVE(req, list); + qemu_co_queue_restart_all(&req->wait_queue); +} diff --git a/block/snapshot-access.c b/block/snapshot-access.c new file mode 100644 index 0000000000..77b87c1946 --- /dev/null +++ b/block/snapshot-access.c @@ -0,0 +1,132 @@ +/* + * snapshot_access block driver + * + * Copyright (c) 2022 Virtuozzo International GmbH. + * + * Author: + * Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" + +#include "sysemu/block-backend.h" +#include "qemu/cutils.h" +#include "block/block_int.h" + +static coroutine_fn int +snapshot_access_co_preadv_part(BlockDriverState *bs, + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) +{ + if (flags) { + return -ENOTSUP; + } + + return bdrv_co_preadv_snapshot(bs->file, offset, bytes, qiov, qiov_offset); +} + +static int coroutine_fn +snapshot_access_co_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, + int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file) +{ + return bdrv_co_snapshot_block_status(bs->file->bs, want_zero, offset, + bytes, pnum, map, file); +} + +static int coroutine_fn snapshot_access_co_pdiscard(BlockDriverState *bs, + int64_t offset, int64_t bytes) +{ + return bdrv_co_pdiscard_snapshot(bs->file->bs, offset, bytes); +} + +static int coroutine_fn +snapshot_access_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int64_t bytes, + BdrvRequestFlags flags) +{ + return -ENOTSUP; +} + +static coroutine_fn int +snapshot_access_co_pwritev_part(BlockDriverState *bs, + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags) +{ + return -ENOTSUP; +} + + +static void snapshot_access_refresh_filename(BlockDriverState *bs) +{ + pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), + bs->file->bs->filename); +} + +static int snapshot_access_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY, + false, errp); + if (!bs->file) { + return -EINVAL; + } + + bs->total_sectors = bs->file->bs->total_sectors; + + return 0; +} + +static void snapshot_access_child_perm(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + /* + * Currently, we don't need any permissions. If bs->file provides + * snapshot-access API, we can use it. + */ + *nperm = 0; + *nshared = BLK_PERM_ALL; +} + +BlockDriver bdrv_snapshot_access_drv = { + .format_name = "snapshot-access", + + .bdrv_open = snapshot_access_open, + + .bdrv_co_preadv_part = snapshot_access_co_preadv_part, + .bdrv_co_pwritev_part = snapshot_access_co_pwritev_part, + .bdrv_co_pwrite_zeroes = snapshot_access_co_pwrite_zeroes, + .bdrv_co_pdiscard = snapshot_access_co_pdiscard, + .bdrv_co_block_status = snapshot_access_co_block_status, + + .bdrv_refresh_filename = snapshot_access_refresh_filename, + + .bdrv_child_perm = snapshot_access_child_perm, +}; + +static void snapshot_access_init(void) +{ + bdrv_register(&bdrv_snapshot_access_drv); +} + +block_init(snapshot_access_init); diff --git a/block/snapshot.c b/block/snapshot.c index ccacda8bd5..d6f53c3065 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -57,6 +57,8 @@ int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info, QEMUSnapshotInfo *sn_tab, *sn; int nb_sns, i, ret; + GLOBAL_STATE_CODE(); + ret = -ENOENT; nb_sns = bdrv_snapshot_list(bs, &sn_tab); if (nb_sns < 0) { @@ -105,6 +107,7 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, bool ret = false; assert(id || name); + GLOBAL_STATE_CODE(); nb_sns = bdrv_snapshot_list(bs, &sn_tab); if (nb_sns < 0) { @@ -200,6 +203,7 @@ static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs) int bdrv_can_snapshot(BlockDriverState *bs) { BlockDriver *drv = bs->drv; + GLOBAL_STATE_CODE(); if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { return 0; } @@ -220,6 +224,9 @@ int bdrv_snapshot_create(BlockDriverState *bs, { BlockDriver *drv = bs->drv; BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs); + + GLOBAL_STATE_CODE(); + if (!drv) { return -ENOMEDIUM; } @@ -240,6 +247,8 @@ int bdrv_snapshot_goto(BlockDriverState *bs, BdrvChild **fallback_ptr; int ret, open_ret; + GLOBAL_STATE_CODE(); + if (!drv) { error_setg(errp, "Block driver is closed"); return -ENOMEDIUM; @@ -348,6 +357,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs, BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs); int ret; + GLOBAL_STATE_CODE(); + if (!drv) { error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs)); return -ENOMEDIUM; @@ -380,6 +391,8 @@ int bdrv_snapshot_list(BlockDriverState *bs, { BlockDriver *drv = bs->drv; BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs); + + GLOBAL_STATE_CODE(); if (!drv) { return -ENOMEDIUM; } @@ -419,6 +432,8 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, { BlockDriver *drv = bs->drv; + GLOBAL_STATE_CODE(); + if (!drv) { error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs)); return -ENOMEDIUM; @@ -447,6 +462,8 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, int ret; Error *local_err = NULL; + GLOBAL_STATE_CODE(); + ret = bdrv_snapshot_load_tmp(bs, id_or_name, NULL, &local_err); if (ret == -ENOENT || ret == -EINVAL) { error_free(local_err); @@ -515,6 +532,8 @@ bool bdrv_all_can_snapshot(bool has_devices, strList *devices, g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + GLOBAL_STATE_CODE(); + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return false; } @@ -549,6 +568,8 @@ int bdrv_all_delete_snapshot(const char *name, g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + GLOBAL_STATE_CODE(); + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return -1; } @@ -588,6 +609,8 @@ int bdrv_all_goto_snapshot(const char *name, g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + GLOBAL_STATE_CODE(); + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return -1; } @@ -622,6 +645,8 @@ int bdrv_all_has_snapshot(const char *name, g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + GLOBAL_STATE_CODE(); + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return -1; } @@ -663,6 +688,7 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, { g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + GLOBAL_STATE_CODE(); if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return -1; @@ -703,6 +729,8 @@ BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs, g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + GLOBAL_STATE_CODE(); + if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return NULL; } diff --git a/block/stream.c b/block/stream.c index 7c6b173ddd..3acb59fe6a 100644 --- a/block/stream.c +++ b/block/stream.c @@ -220,6 +220,8 @@ void stream_start(const char *job_id, BlockDriverState *bs, QDict *opts; int ret; + GLOBAL_STATE_CODE(); + assert(!(base && bottom)); assert(!(backing_file_str && bottom)); diff --git a/block/vdi.c b/block/vdi.c index bdc58d726e..cca3a3a356 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -64,6 +64,7 @@ #include "qemu/coroutine.h" #include "qemu/cutils.h" #include "qemu/uuid.h" +#include "qemu/memalign.h" /* Code configuration options. */ diff --git a/block/vhdx-log.c b/block/vhdx-log.c index 7672161d95..ff0d4e0da0 100644 --- a/block/vhdx-log.c +++ b/block/vhdx-log.c @@ -23,6 +23,7 @@ #include "block/block_int.h" #include "qemu/error-report.h" #include "qemu/bswap.h" +#include "qemu/memalign.h" #include "vhdx.h" diff --git a/block/vhdx.c b/block/vhdx.c index 356ec4c455..410c6f9610 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -25,6 +25,7 @@ #include "qemu/crc32c.h" #include "qemu/bswap.h" #include "qemu/error-report.h" +#include "qemu/memalign.h" #include "vhdx.h" #include "migration/blocker.h" #include "qemu/uuid.h" diff --git a/block/vmdk.c b/block/vmdk.c index 0dfab6e941..37c0946066 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -33,6 +33,7 @@ #include "qemu/module.h" #include "qemu/option.h" #include "qemu/bswap.h" +#include "qemu/memalign.h" #include "migration/blocker.h" #include "qemu/cutils.h" #include <zlib.h> diff --git a/block/vpc.c b/block/vpc.c index 297a26262a..4d8f16e199 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -33,6 +33,7 @@ #include "migration/blocker.h" #include "qemu/bswap.h" #include "qemu/uuid.h" +#include "qemu/memalign.h" #include "qapi/qmp/qdict.h" #include "qapi/qobject-input-visitor.h" #include "qapi/qapi-visit-block-core.h" diff --git a/block/win32-aio.c b/block/win32-aio.c index c57e10c997..aadc7b1bc3 100644 --- a/block/win32-aio.c +++ b/block/win32-aio.c @@ -29,6 +29,7 @@ #include "block/raw-aio.h" #include "qemu/event_notifier.h" #include "qemu/iov.h" +#include "qemu/memalign.h" #include <windows.h> #include <winioctl.h> diff --git a/blockdev.c b/blockdev.c index 42e098b458..e46e831212 100644 --- a/blockdev.c +++ b/blockdev.c @@ -63,11 +63,13 @@ #include "qemu/main-loop.h" #include "qemu/throttle-options.h" +/* Protected by BQL */ QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states = QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states); void bdrv_set_monitor_owned(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list); } @@ -111,6 +113,8 @@ void override_max_devs(BlockInterfaceType type, int max_devs) BlockBackend *blk; DriveInfo *dinfo; + GLOBAL_STATE_CODE(); + if (max_devs <= 0) { return; } @@ -140,6 +144,8 @@ void blockdev_mark_auto_del(BlockBackend *blk) DriveInfo *dinfo = blk_legacy_dinfo(blk); BlockJob *job; + GLOBAL_STATE_CODE(); + if (!dinfo) { return; } @@ -161,6 +167,7 @@ void blockdev_mark_auto_del(BlockBackend *blk) void blockdev_auto_del(BlockBackend *blk) { DriveInfo *dinfo = blk_legacy_dinfo(blk); + GLOBAL_STATE_CODE(); if (dinfo && dinfo->auto_del) { monitor_remove_blk(blk); @@ -185,6 +192,8 @@ QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file, { QemuOpts *opts; + GLOBAL_STATE_CODE(); + opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false); if (!opts) { return NULL; @@ -205,6 +214,8 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit) BlockBackend *blk; DriveInfo *dinfo; + GLOBAL_STATE_CODE(); + for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { dinfo = blk_legacy_dinfo(blk); if (dinfo && dinfo->type == type @@ -227,6 +238,8 @@ void drive_check_orphaned(void) Location loc; bool orphans = false; + GLOBAL_STATE_CODE(); + for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { dinfo = blk_legacy_dinfo(blk); /* @@ -260,6 +273,7 @@ void drive_check_orphaned(void) DriveInfo *drive_get_by_index(BlockInterfaceType type, int index) { + GLOBAL_STATE_CODE(); return drive_get(type, drive_index_to_bus_id(type, index), drive_index_to_unit_id(type, index)); @@ -271,6 +285,8 @@ int drive_get_max_bus(BlockInterfaceType type) BlockBackend *blk; DriveInfo *dinfo; + GLOBAL_STATE_CODE(); + max_bus = -1; for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { dinfo = blk_legacy_dinfo(blk); @@ -628,6 +644,7 @@ BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) { int bdrv_flags = 0; + GLOBAL_STATE_CODE(); /* bdrv_open() defaults to the values in bdrv_flags (for compatibility * with other callers) rather than what we want as the real defaults. * Apply the defaults here instead. */ @@ -646,6 +663,7 @@ void blockdev_close_all_bdrv_states(void) { BlockDriverState *bs, *next_bs; + GLOBAL_STATE_CODE(); QTAILQ_FOREACH_SAFE(bs, &monitor_bdrv_states, monitor_list, next_bs) { AioContext *ctx = bdrv_get_aio_context(bs); @@ -658,6 +676,7 @@ void blockdev_close_all_bdrv_states(void) /* Iterates over the list of monitor-owned BlockDriverStates */ BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); return bs ? QTAILQ_NEXT(bs, monitor_list) : QTAILQ_FIRST(&monitor_bdrv_states); } @@ -754,6 +773,8 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, const char *filename; int i; + GLOBAL_STATE_CODE(); + /* Change legacy command line options into QMP ones */ static const struct { const char *from; @@ -1174,6 +1195,8 @@ typedef struct BlkActionState BlkActionState; * * Only prepare() may fail. In a single transaction, only one of commit() or * abort() will be called. clean() will always be called if it is present. + * + * Always run under BQL. */ typedef struct BlkActionOps { size_t instance_size; @@ -2283,6 +2306,8 @@ static TransactionProperties *get_transaction_properties( /* * 'Atomic' group operations. The operations are performed as a set, and if * any fail then we roll back all operations in the group. + * + * Always run under BQL. */ void qmp_transaction(TransactionActionList *dev_list, bool has_props, @@ -2294,6 +2319,8 @@ void qmp_transaction(TransactionActionList *dev_list, BlkActionState *state, *next; Error *local_err = NULL; + GLOBAL_STATE_CODE(); + QTAILQ_HEAD(, BlkActionState) snap_bdrv_states; QTAILQ_INIT(&snap_bdrv_states); @@ -3596,6 +3623,8 @@ void qmp_blockdev_del(const char *node_name, Error **errp) AioContext *aio_context; BlockDriverState *bs; + GLOBAL_STATE_CODE(); + bs = bdrv_find_node(node_name); if (!bs) { error_setg(errp, "Failed to find node with node-name='%s'", node_name); diff --git a/blockjob.c b/blockjob.c index 10815a89fe..4868453d74 100644 --- a/blockjob.c +++ b/blockjob.c @@ -62,6 +62,7 @@ static bool is_block_job(Job *job) BlockJob *block_job_next(BlockJob *bjob) { Job *job = bjob ? &bjob->job : NULL; + GLOBAL_STATE_CODE(); do { job = job_next(job); @@ -73,6 +74,7 @@ BlockJob *block_job_next(BlockJob *bjob) BlockJob *block_job_get(const char *id) { Job *job = job_get(id); + GLOBAL_STATE_CODE(); if (job && is_block_job(job)) { return container_of(job, BlockJob, job); @@ -84,6 +86,7 @@ BlockJob *block_job_get(const char *id) void block_job_free(Job *job) { BlockJob *bjob = container_of(job, BlockJob, job); + GLOBAL_STATE_CODE(); block_job_remove_all_bdrv(bjob); ratelimit_destroy(&bjob->limit); @@ -183,6 +186,7 @@ static const BdrvChildClass child_job = { void block_job_remove_all_bdrv(BlockJob *job) { + GLOBAL_STATE_CODE(); /* * bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(), * which will also traverse job->nodes, so consume the list one by @@ -205,6 +209,7 @@ void block_job_remove_all_bdrv(BlockJob *job) bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) { GSList *el; + GLOBAL_STATE_CODE(); for (el = job->nodes; el; el = el->next) { BdrvChild *c = el->data; @@ -221,6 +226,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, { BdrvChild *c; bool need_context_ops; + GLOBAL_STATE_CODE(); bdrv_ref(bs); @@ -270,6 +276,8 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) const BlockJobDriver *drv = block_job_driver(job); int64_t old_speed = job->speed; + GLOBAL_STATE_CODE(); + if (job_apply_verb(&job->job, JOB_VERB_SET_SPEED, errp) < 0) { return false; } @@ -299,6 +307,7 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n) { + IO_CODE(); return ratelimit_calculate_delay(&job->limit, n); } @@ -307,6 +316,8 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp) BlockJobInfo *info; uint64_t progress_current, progress_total; + GLOBAL_STATE_CODE(); + if (block_job_is_internal(job)) { error_setg(errp, "Cannot query QEMU internal jobs"); return NULL; @@ -434,6 +445,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, { BlockJob *job; int ret; + GLOBAL_STATE_CODE(); if (job_id == NULL && !(flags & JOB_INTERNAL)) { job_id = bdrv_get_device_name(bs); @@ -488,6 +500,7 @@ fail: void block_job_iostatus_reset(BlockJob *job) { + GLOBAL_STATE_CODE(); if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { return; } @@ -498,6 +511,7 @@ void block_job_iostatus_reset(BlockJob *job) void block_job_user_resume(Job *job) { BlockJob *bjob = container_of(job, BlockJob, job); + GLOBAL_STATE_CODE(); block_job_iostatus_reset(bjob); } @@ -505,6 +519,7 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err, int is_read, int error) { BlockErrorAction action; + IO_CODE(); switch (on_err) { case BLOCKDEV_ON_ERROR_ENOSPC: @@ -543,5 +558,6 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err, AioContext *block_job_get_aio_context(BlockJob *job) { + GLOBAL_STATE_CODE(); return job->job.aio_context; } @@ -630,10 +630,10 @@ case "$cpu" in ppc) CPU_CFLAGS="-m32" ;; ppc64) - CPU_CFLAGS="-m64 -mbig" ;; + CPU_CFLAGS="-m64 -mbig-endian" ;; ppc64le) cpu="ppc64" - CPU_CFLAGS="-m64 -mlittle" ;; + CPU_CFLAGS="-m64 -mlittle-endian" ;; s390) CPU_CFLAGS="-m31" ;; @@ -35,10 +35,12 @@ #include "sysemu/tcg.h" #include "sysemu/kvm.h" #include "sysemu/replay.h" +#include "exec/exec-all.h" #include "exec/translate-all.h" #include "exec/log.h" #include "hw/core/accel-cpu.h" #include "trace/trace-root.h" +#include "qemu/accel.h" uintptr_t qemu_host_page_size; intptr_t qemu_host_page_mask; @@ -415,11 +417,11 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...) /* physical memory access (slow version, mainly for debug) */ #if defined(CONFIG_USER_ONLY) -int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, - void *ptr, target_ulong len, bool is_write) +int cpu_memory_rw_debug(CPUState *cpu, vaddr addr, + void *ptr, size_t len, bool is_write) { int flags; - target_ulong l, page; + vaddr l, page; void * p; uint8_t *buf = ptr; diff --git a/docs/specs/index.rst b/docs/specs/index.rst index 32863b4aa5..e10684bf53 100644 --- a/docs/specs/index.rst +++ b/docs/specs/index.rst @@ -19,3 +19,4 @@ guest hardware that is specific to QEMU. acpi_pci_hotplug acpi_nvdimm acpi_erst + sev-guest-firmware diff --git a/docs/specs/sev-guest-firmware.rst b/docs/specs/sev-guest-firmware.rst new file mode 100644 index 0000000000..3f7f082df5 --- /dev/null +++ b/docs/specs/sev-guest-firmware.rst @@ -0,0 +1,125 @@ +==================================================== +QEMU/Guest Firmware Interface for AMD SEV and SEV-ES +==================================================== + +Overview +======== + +The guest firmware image (OVMF) may contain some configuration entries +which are used by QEMU before the guest launches. These are listed in a +GUIDed table at a known location in the firmware image. QEMU parses +this table when it loads the firmware image into memory, and then QEMU +reads individual entries when their values are needed. + +Though nothing in the table structure is SEV-specific, currently all the +entries in the table are related to SEV and SEV-ES features. + + +Table parsing in QEMU +--------------------- + +The table is parsed from the footer: first the presence of the table +footer GUID (96b582de-1fb2-45f7-baea-a366c55a082d) at 0xffffffd0 is +verified. If that is found, two bytes at 0xffffffce are the entire +table length. + +Then the table is scanned backwards looking for the specific entry GUID. + +QEMU files related to parsing and scanning the OVMF table: + - ``hw/i386/pc_sysfw_ovmf.c`` + +The edk2 firmware code that constructs this structure is in the +`OVMF Reset Vector file`_. + + +Table memory layout +------------------- + ++------------+--------+-----------------------------------------+ +| GPA | Length | Description | ++============+========+=========================================+ +| 0xffffff80 | 4 | Zero padding | ++------------+--------+-----------------------------------------+ +| 0xffffff84 | 4 | SEV hashes table base address | ++------------+--------+-----------------------------------------+ +| 0xffffff88 | 4 | SEV hashes table size (=0x400) | ++------------+--------+-----------------------------------------+ +| 0xffffff8c | 2 | SEV hashes table entry length (=0x1a) | ++------------+--------+-----------------------------------------+ +| 0xffffff8e | 16 | SEV hashes table GUID: | +| | | 7255371f-3a3b-4b04-927b-1da6efa8d454 | ++------------+--------+-----------------------------------------+ +| 0xffffff9e | 4 | SEV secret block base address | ++------------+--------+-----------------------------------------+ +| 0xffffffa2 | 4 | SEV secret block size (=0xc00) | ++------------+--------+-----------------------------------------+ +| 0xffffffa6 | 2 | SEV secret block entry length (=0x1a) | ++------------+--------+-----------------------------------------+ +| 0xffffffa8 | 16 | SEV secret block GUID: | +| | | 4c2eb361-7d9b-4cc3-8081-127c90d3d294 | ++------------+--------+-----------------------------------------+ +| 0xffffffb8 | 4 | SEV-ES AP reset RIP | ++------------+--------+-----------------------------------------+ +| 0xffffffbc | 2 | SEV-ES reset block entry length (=0x16) | ++------------+--------+-----------------------------------------+ +| 0xffffffbe | 16 | SEV-ES reset block entry GUID: | +| | | 00f771de-1a7e-4fcb-890e-68c77e2fb44e | ++------------+--------+-----------------------------------------+ +| 0xffffffce | 2 | Length of entire table including table | +| | | footer GUID and length (=0x72) | ++------------+--------+-----------------------------------------+ +| 0xffffffd0 | 16 | OVMF GUIDed table footer GUID: | +| | | 96b582de-1fb2-45f7-baea-a366c55a082d | ++------------+--------+-----------------------------------------+ +| 0xffffffe0 | 8 | Application processor entry point code | ++------------+--------+-----------------------------------------+ +| 0xffffffe8 | 8 | "\0\0\0\0VTF\0" | ++------------+--------+-----------------------------------------+ +| 0xfffffff0 | 16 | Reset vector code | ++------------+--------+-----------------------------------------+ + + +Table entries description +========================= + +SEV-ES reset block +------------------ + +Entry GUID: 00f771de-1a7e-4fcb-890e-68c77e2fb44e + +For the initial boot of an AP under SEV-ES, the "reset" RIP must be +programmed to the RAM area defined by this entry. The entry's format +is: + +* IP value [0:15] +* CS segment base [31:16] + +A hypervisor reads the CS segment base and IP value. The CS segment +base value represents the high order 16-bits of the CS segment base, so +the hypervisor must left shift the value of the CS segment base by 16 +bits to form the full CS segment base for the CS segment register. It +would then program the EIP register with the IP value as read. + + +SEV secret block +---------------- + +Entry GUID: 4c2eb361-7d9b-4cc3-8081-127c90d3d294 + +This describes the guest RAM area where the hypervisor should inject the +Guest Owner secret (using SEV_LAUNCH_SECRET). + + +SEV hashes table +---------------- + +Entry GUID: 7255371f-3a3b-4b04-927b-1da6efa8d454 + +This describes the guest RAM area where the hypervisor should install a +table describing the hashes of certain firmware configuration device +files that would otherwise be passed in unchecked. The current use is +for the kernel, initrd and command line values, but others may be added. + + +.. _OVMF Reset Vector file: + https://github.com/tianocore/edk2/blob/master/OvmfPkg/ResetVector/Ia16/ResetVectorVtf0.asm diff --git a/docs/system/riscv/virt.rst b/docs/system/riscv/virt.rst index 08ce3c4177..1272b6659e 100644 --- a/docs/system/riscv/virt.rst +++ b/docs/system/riscv/virt.rst @@ -63,6 +63,22 @@ The following machine-specific options are supported: When this option is "on", ACLINT devices will be emulated instead of SiFive CLINT. When not specified, this option is assumed to be "off". +- aia=[none|aplic|aplic-imsic] + + This option allows selecting interrupt controller defined by the AIA + (advanced interrupt architecture) specification. The "aia=aplic" selects + APLIC (advanced platform level interrupt controller) to handle wired + interrupts whereas the "aia=aplic-imsic" selects APLIC and IMSIC (incoming + message signaled interrupt controller) to handle both wired interrupts and + MSIs. When not specified, this option is assumed to be "none" which selects + SiFive PLIC to handle wired interrupts. + +- aia-guests=nnn + + The number of per-HART VS-level AIA IMSIC pages to be emulated for a guest + having AIA IMSIC (i.e. "aia=aplic-imsic" selected). When not specified, + the default number of per-HART VS-level AIA IMSIC pages is 0. + Running Linux kernel -------------------- diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst index 878e6a5c5c..8b97592663 100644 --- a/docs/tools/qemu-storage-daemon.rst +++ b/docs/tools/qemu-storage-daemon.rst @@ -154,6 +154,13 @@ Standard options: created but before accepting connections. The daemon has started successfully when the pid file is written and clients may begin connecting. +.. option:: --daemonize + + Daemonize the process. The parent process will exit once startup is complete + (i.e., after the pid file has been or would have been written) or failure + occurs. Its exit code reflects whether the child has started up successfully + or failed to do so. + Examples -------- Launch the daemon with QMP monitor socket ``qmp.sock`` so clients can execute diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h index 8fd89f0447..4997677460 100644 --- a/fsdev/file-op-9p.h +++ b/fsdev/file-op-9p.h @@ -16,10 +16,17 @@ #include <dirent.h> #include <utime.h> -#include <sys/vfs.h> #include "qemu-fsdev-throttle.h" #include "p9array.h" +#ifdef CONFIG_LINUX +# include <sys/vfs.h> +#endif +#ifdef CONFIG_DARWIN +# include <sys/param.h> +# include <sys/mount.h> +#endif + #define SM_LOCAL_MODE_BITS 0600 #define SM_LOCAL_DIR_MODE_BITS 0700 diff --git a/fsdev/meson.build b/fsdev/meson.build index adf57cc43e..b632b66348 100644 --- a/fsdev/meson.build +++ b/fsdev/meson.build @@ -7,6 +7,7 @@ fsdev_ss.add(when: ['CONFIG_FSDEV_9P'], if_true: files( 'qemu-fsdev.c', ), if_false: files('qemu-fsdev-dummy.c')) softmmu_ss.add_all(when: 'CONFIG_LINUX', if_true: fsdev_ss) +softmmu_ss.add_all(when: 'CONFIG_DARWIN', if_true: fsdev_ss) if have_virtfs_proxy_helper executable('virtfs-proxy-helper', diff --git a/fsdev/p9array.h b/fsdev/p9array.h index 6aa25327ca..90e83a7c7b 100644 --- a/fsdev/p9array.h +++ b/fsdev/p9array.h @@ -81,11 +81,11 @@ */ /** - * Declares an array type for the passed @a scalar_type. + * P9ARRAY_DECLARE_TYPE() - Declares an array type for the passed @scalar_type. * - * This is typically used from a shared header file. + * @scalar_type: type of the individual array elements * - * @param scalar_type - type of the individual array elements + * This is typically used from a shared header file. */ #define P9ARRAY_DECLARE_TYPE(scalar_type) \ typedef struct P9Array##scalar_type { \ @@ -97,14 +97,14 @@ void p9array_auto_free_##scalar_type(scalar_type **auto_var); \ /** - * Defines an array type for the passed @a scalar_type and appropriate - * @a scalar_cleanup_func. + * P9ARRAY_DEFINE_TYPE() - Defines an array type for the passed @scalar_type + * and appropriate @scalar_cleanup_func. * - * This is typically used from a C unit file. + * @scalar_type: type of the individual array elements + * @scalar_cleanup_func: appropriate function to free memory dynamically + * allocated by individual array elements before * - * @param scalar_type - type of the individual array elements - * @param scalar_cleanup_func - appropriate function to free memory dynamically - * allocated by individual array elements before + * This is typically used from a C unit file. */ #define P9ARRAY_DEFINE_TYPE(scalar_type, scalar_cleanup_func) \ void p9array_new_##scalar_type(scalar_type **auto_var, size_t len) \ @@ -132,23 +132,27 @@ } \ /** + * P9ARRAY_REF() - Declare a reference variable for an array. + * + * @scalar_type: type of the individual array elements + * * Used to declare a reference variable (unique pointer) for an array. After * leaving the scope of the reference variable, the associated array is * automatically freed. - * - * @param scalar_type - type of the individual array elements */ #define P9ARRAY_REF(scalar_type) \ __attribute((__cleanup__(p9array_auto_free_##scalar_type))) scalar_type* /** - * Allocates a new array of passed @a scalar_type with @a len number of array - * elements and assigns the created array to the reference variable - * @a auto_var. + * P9ARRAY_NEW() - Allocate a new array. * - * @param scalar_type - type of the individual array elements - * @param auto_var - destination reference variable - * @param len - amount of array elements to be allocated immediately + * @scalar_type: type of the individual array elements + * @auto_var: destination reference variable + * @len: amount of array elements to be allocated immediately + * + * Allocates a new array of passed @scalar_type with @len number of array + * elements and assigns the created array to the reference variable + * @auto_var. */ #define P9ARRAY_NEW(scalar_type, auto_var, len) \ QEMU_BUILD_BUG_MSG( \ diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 210d9e7705..d42ce6d8b8 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -32,10 +32,12 @@ #include "qemu/error-report.h" #include "qemu/option.h" #include <libgen.h> +#ifdef CONFIG_LINUX #include <linux/fs.h> #ifdef CONFIG_LINUX_MAGIC_H #include <linux/magic.h> #endif +#endif #include <sys/ioctl.h> #ifndef XFS_SUPER_MAGIC @@ -560,6 +562,15 @@ again: if (!entry) { return NULL; } +#ifdef CONFIG_DARWIN + int off; + off = telldir(fs->dir.stream); + /* If telldir fails, fail the entire readdir call */ + if (off < 0) { + return NULL; + } + entry->d_seekoff = off; +#endif if (ctx->export_flags & V9FS_SM_MAPPED) { entry->d_type = DT_UNKNOWN; @@ -671,7 +682,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path, if (fs_ctx->export_flags & V9FS_SM_MAPPED || fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { - err = mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0); + err = qemu_mknodat(dirfd, name, fs_ctx->fmode | S_IFREG, 0); if (err == -1) { goto out; } @@ -686,7 +697,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path, } } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH || fs_ctx->export_flags & V9FS_SM_NONE) { - err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev); + err = qemu_mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev); if (err == -1) { goto out; } @@ -779,16 +790,20 @@ static int local_fstat(FsContext *fs_ctx, int fid_type, mode_t tmp_mode; dev_t tmp_dev; - if (fgetxattr(fd, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) { + if (qemu_fgetxattr(fd, "user.virtfs.uid", + &tmp_uid, sizeof(uid_t)) > 0) { stbuf->st_uid = le32_to_cpu(tmp_uid); } - if (fgetxattr(fd, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) { + if (qemu_fgetxattr(fd, "user.virtfs.gid", + &tmp_gid, sizeof(gid_t)) > 0) { stbuf->st_gid = le32_to_cpu(tmp_gid); } - if (fgetxattr(fd, "user.virtfs.mode", &tmp_mode, sizeof(mode_t)) > 0) { + if (qemu_fgetxattr(fd, "user.virtfs.mode", + &tmp_mode, sizeof(mode_t)) > 0) { stbuf->st_mode = le32_to_cpu(tmp_mode); } - if (fgetxattr(fd, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) { + if (qemu_fgetxattr(fd, "user.virtfs.rdev", + &tmp_dev, sizeof(dev_t)) > 0) { stbuf->st_rdev = le64_to_cpu(tmp_dev); } } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) { diff --git a/hw/9pfs/9p-proxy.c b/hw/9pfs/9p-proxy.c index 09bd9f1464..8b4b5cf7dc 100644 --- a/hw/9pfs/9p-proxy.c +++ b/hw/9pfs/9p-proxy.c @@ -123,10 +123,16 @@ static void prstatfs_to_statfs(struct statfs *stfs, ProxyStatFS *prstfs) stfs->f_bavail = prstfs->f_bavail; stfs->f_files = prstfs->f_files; stfs->f_ffree = prstfs->f_ffree; +#ifdef CONFIG_DARWIN + /* f_namelen and f_frsize do not exist on Darwin */ + stfs->f_fsid.val[0] = prstfs->f_fsid[0] & 0xFFFFFFFFU; + stfs->f_fsid.val[1] = prstfs->f_fsid[1] >> 32 & 0xFFFFFFFFU; +#else stfs->f_fsid.__val[0] = prstfs->f_fsid[0] & 0xFFFFFFFFU; stfs->f_fsid.__val[1] = prstfs->f_fsid[1] >> 32 & 0xFFFFFFFFU; stfs->f_namelen = prstfs->f_namelen; stfs->f_frsize = prstfs->f_frsize; +#endif } /* Converts proxy_stat structure to VFS stat structure */ @@ -143,12 +149,24 @@ static void prstat_to_stat(struct stat *stbuf, ProxyStat *prstat) stbuf->st_size = prstat->st_size; stbuf->st_blksize = prstat->st_blksize; stbuf->st_blocks = prstat->st_blocks; + stbuf->st_atime = prstat->st_atim_sec; + stbuf->st_mtime = prstat->st_mtim_sec; + stbuf->st_ctime = prstat->st_ctim_sec; +#ifdef CONFIG_DARWIN + stbuf->st_atimespec.tv_sec = prstat->st_atim_sec; + stbuf->st_mtimespec.tv_sec = prstat->st_mtim_sec; + stbuf->st_ctimespec.tv_sec = prstat->st_ctim_sec; + stbuf->st_atimespec.tv_nsec = prstat->st_atim_nsec; + stbuf->st_mtimespec.tv_nsec = prstat->st_mtim_nsec; + stbuf->st_ctimespec.tv_nsec = prstat->st_ctim_nsec; +#else stbuf->st_atim.tv_sec = prstat->st_atim_sec; + stbuf->st_mtim.tv_sec = prstat->st_mtim_sec; + stbuf->st_ctim.tv_sec = prstat->st_ctim_sec; stbuf->st_atim.tv_nsec = prstat->st_atim_nsec; - stbuf->st_mtime = prstat->st_mtim_sec; stbuf->st_mtim.tv_nsec = prstat->st_mtim_nsec; - stbuf->st_ctime = prstat->st_ctim_sec; stbuf->st_ctim.tv_nsec = prstat->st_ctim_nsec; +#endif } /* @@ -688,7 +706,21 @@ static off_t proxy_telldir(FsContext *ctx, V9fsFidOpenState *fs) static struct dirent *proxy_readdir(FsContext *ctx, V9fsFidOpenState *fs) { - return readdir(fs->dir.stream); + struct dirent *entry; + entry = readdir(fs->dir.stream); +#ifdef CONFIG_DARWIN + if (!entry) { + return NULL; + } + int td; + td = telldir(fs->dir.stream); + /* If telldir fails, fail the entire readdir call */ + if (td < 0) { + return NULL; + } + entry->d_seekoff = td; +#endif + return entry; } static void proxy_seekdir(FsContext *ctx, V9fsFidOpenState *fs, off_t off) diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c index 7a7cd5c5ba..b3080e415b 100644 --- a/hw/9pfs/9p-synth.c +++ b/hw/9pfs/9p-synth.c @@ -234,7 +234,11 @@ static void synth_direntry(V9fsSynthNode *node, offsetof(struct dirent, d_name) + sz); memcpy(entry->d_name, node->name, sz); entry->d_ino = node->attr->inode; +#ifdef CONFIG_DARWIN + entry->d_seekoff = off + 1; +#else entry->d_off = off + 1; +#endif } static struct dirent *synth_get_dentry(V9fsSynthNode *dir, @@ -439,7 +443,9 @@ static int synth_statfs(FsContext *s, V9fsPath *fs_path, stbuf->f_bsize = 512; stbuf->f_blocks = 0; stbuf->f_files = synth_node_count; +#ifndef CONFIG_DARWIN stbuf->f_namelen = NAME_MAX; +#endif return 0; } diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c new file mode 100644 index 0000000000..bec0253474 --- /dev/null +++ b/hw/9pfs/9p-util-darwin.c @@ -0,0 +1,97 @@ +/* + * 9p utilities (Darwin Implementation) + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/xattr.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "9p-util.h" + +ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size) +{ + int ret; + int fd = openat_file(dirfd, filename, + O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0); + if (fd == -1) { + return -1; + } + ret = fgetxattr(fd, name, value, size, 0, 0); + close_preserve_errno(fd); + return ret; +} + +ssize_t flistxattrat_nofollow(int dirfd, const char *filename, + char *list, size_t size) +{ + int ret; + int fd = openat_file(dirfd, filename, + O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0); + if (fd == -1) { + return -1; + } + ret = flistxattr(fd, list, size, 0); + close_preserve_errno(fd); + return ret; +} + +ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, + const char *name) +{ + int ret; + int fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0); + if (fd == -1) { + return -1; + } + ret = fremovexattr(fd, name, 0); + close_preserve_errno(fd); + return ret; +} + +int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name, + void *value, size_t size, int flags) +{ + int ret; + int fd = openat_file(dirfd, filename, O_PATH_9P_UTIL | O_NOFOLLOW, 0); + if (fd == -1) { + return -1; + } + ret = fsetxattr(fd, name, value, size, 0, flags); + close_preserve_errno(fd); + return ret; +} + +/* + * As long as mknodat is not available on macOS, this workaround + * using pthread_fchdir_np is needed. + * + * Radar filed with Apple for implementing mknodat: + * rdar://FB9862426 (https://openradar.appspot.com/FB9862426) + */ +#if defined CONFIG_PTHREAD_FCHDIR_NP + +int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev) +{ + int preserved_errno, err; + if (!pthread_fchdir_np) { + error_report_once("pthread_fchdir_np() not available on this version of macOS"); + return -ENOTSUP; + } + if (pthread_fchdir_np(dirfd) < 0) { + return -1; + } + err = mknod(filename, mode, dev); + preserved_errno = errno; + /* Stop using the thread-local cwd */ + pthread_fchdir_np(-1); + if (err < 0) { + errno = preserved_errno; + } + return err; +} + +#endif diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util-linux.c index 3221d9b498..db451b0784 100644 --- a/hw/9pfs/9p-util.c +++ b/hw/9pfs/9p-util-linux.c @@ -1,5 +1,5 @@ /* - * 9p utilities + * 9p utilities (Linux Implementation) * * Copyright IBM, Corp. 2017 * @@ -61,4 +61,10 @@ int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name, ret = lsetxattr(proc_path, name, value, size, flags); g_free(proc_path); return ret; + +} + +int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev) +{ + return mknodat(dirfd, filename, mode, dev); } diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h index 546f46dc7d..97e681e167 100644 --- a/hw/9pfs/9p-util.h +++ b/hw/9pfs/9p-util.h @@ -19,6 +19,23 @@ #define O_PATH_9P_UTIL 0 #endif +#ifdef CONFIG_DARWIN +#define qemu_fgetxattr(...) fgetxattr(__VA_ARGS__, 0, 0) +#define qemu_lgetxattr(...) getxattr(__VA_ARGS__, 0, XATTR_NOFOLLOW) +#define qemu_llistxattr(...) listxattr(__VA_ARGS__, XATTR_NOFOLLOW) +#define qemu_lremovexattr(...) removexattr(__VA_ARGS__, XATTR_NOFOLLOW) +static inline int qemu_lsetxattr(const char *path, const char *name, + const void *value, size_t size, int flags) { + return setxattr(path, name, value, size, 0, flags | XATTR_NOFOLLOW); +} +#else +#define qemu_fgetxattr fgetxattr +#define qemu_lgetxattr lgetxattr +#define qemu_llistxattr llistxattr +#define qemu_lremovexattr lremovexattr +#define qemu_lsetxattr lsetxattr +#endif + static inline void close_preserve_errno(int fd) { int serrno = errno; @@ -37,10 +54,13 @@ static inline int openat_file(int dirfd, const char *name, int flags, { int fd, serrno, ret; +#ifndef CONFIG_DARWIN again: +#endif fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK, mode); if (fd == -1) { +#ifndef CONFIG_DARWIN if (errno == EPERM && (flags & O_NOATIME)) { /* * The client passed O_NOATIME but we lack permissions to honor it. @@ -53,6 +73,7 @@ again: flags &= ~O_NOATIME; goto again; } +#endif return -1; } @@ -78,4 +99,61 @@ ssize_t flistxattrat_nofollow(int dirfd, const char *filename, ssize_t fremovexattrat_nofollow(int dirfd, const char *filename, const char *name); +/* + * Darwin has d_seekoff, which appears to function similarly to d_off. + * However, it does not appear to be supported on all file systems, + * so ensure it is manually injected earlier and call here when + * needed. + */ +static inline off_t qemu_dirent_off(struct dirent *dent) +{ +#ifdef CONFIG_DARWIN + return dent->d_seekoff; +#else + return dent->d_off; +#endif +} + +/** + * qemu_dirent_dup() - Duplicate directory entry @dent. + * + * @dent: original directory entry to be duplicated + * Return: duplicated directory entry which should be freed with g_free() + * + * It is highly recommended to use this function instead of open coding + * duplication of dirent objects, because the actual struct dirent + * size may be bigger or shorter than sizeof(struct dirent) and correct + * handling is platform specific (see gitlab issue #841). + */ +static inline struct dirent *qemu_dirent_dup(struct dirent *dent) +{ + size_t sz = 0; +#if defined _DIRENT_HAVE_D_RECLEN + /* Avoid use of strlen() if platform supports d_reclen. */ + sz = dent->d_reclen; +#endif + /* + * Test sz for zero even if d_reclen is available + * because some drivers may set d_reclen to zero. + */ + if (sz == 0) { + /* Fallback to the most portable way. */ + sz = offsetof(struct dirent, d_name) + + strlen(dent->d_name) + 1; + } + return g_memdup(dent, sz); +} + +/* + * As long as mknodat is not available on macOS, this workaround + * using pthread_fchdir_np is needed. qemu_mknodat is defined in + * os-posix.c. pthread_fchdir_np is weakly linked here as a guard + * in case it disappears in future macOS versions, because it is + * is a private API. + */ +#if defined CONFIG_DARWIN && defined CONFIG_PTHREAD_FCHDIR_NP +int pthread_fchdir_np(int fd) __attribute__((weak_import)); +#endif +int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev); + #endif diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 15b3f4d385..a6d6b3f835 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -27,12 +27,17 @@ #include "virtio-9p.h" #include "fsdev/qemu-fsdev.h" #include "9p-xattr.h" +#include "9p-util.h" #include "coth.h" #include "trace.h" #include "migration/blocker.h" #include "qemu/xxhash.h" #include <math.h> +#ifdef CONFIG_LINUX #include <linux/limits.h> +#else +#include <limits.h> +#endif int open_fd_hw; int total_open_fd; @@ -133,11 +138,20 @@ static int dotl_to_open_flags(int flags) { P9_DOTL_NONBLOCK, O_NONBLOCK } , { P9_DOTL_DSYNC, O_DSYNC }, { P9_DOTL_FASYNC, FASYNC }, +#ifndef CONFIG_DARWIN + { P9_DOTL_NOATIME, O_NOATIME }, + /* + * On Darwin, we could map to F_NOCACHE, which is + * similar, but doesn't quite have the same + * semantics. However, we don't support O_DIRECT + * even on linux at the moment, so we just ignore + * it here. + */ { P9_DOTL_DIRECT, O_DIRECT }, +#endif { P9_DOTL_LARGEFILE, O_LARGEFILE }, { P9_DOTL_DIRECTORY, O_DIRECTORY }, { P9_DOTL_NOFOLLOW, O_NOFOLLOW }, - { P9_DOTL_NOATIME, O_NOATIME }, { P9_DOTL_SYNC, O_SYNC }, }; @@ -166,10 +180,12 @@ static int get_dotl_openflags(V9fsState *s, int oflags) */ flags = dotl_to_open_flags(oflags); flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT); +#ifndef CONFIG_DARWIN /* * Ignore direct disk access hint until the server supports it. */ flags &= ~O_DIRECT; +#endif return flags; } @@ -612,8 +628,8 @@ static inline uint64_t mirror64bit(uint64_t value) ((uint64_t)mirror8bit((value >> 56) & 0xff)); } -/** - * @brief Parameter k for the Exponential Golomb algorihm to be used. +/* + * Parameter k for the Exponential Golomb algorihm to be used. * * The smaller this value, the smaller the minimum bit count for the Exp. * Golomb generated affixes will be (at lowest index) however for the @@ -626,28 +642,30 @@ static inline uint64_t mirror64bit(uint64_t value) * should be small, for a large amount of devices k might be increased * instead. The default of k=0 should be fine for most users though. * - * @b IMPORTANT: In case this ever becomes a runtime parameter; the value of + * IMPORTANT: In case this ever becomes a runtime parameter; the value of * k should not change as long as guest is still running! Because that would * cause completely different inode numbers to be generated on guest. */ #define EXP_GOLOMB_K 0 /** - * @brief Exponential Golomb algorithm for arbitrary k (including k=0). + * expGolombEncode() - Exponential Golomb algorithm for arbitrary k + * (including k=0). + * + * @n: natural number (or index) of the prefix to be generated + * (1, 2, 3, ...) + * @k: parameter k of Exp. Golomb algorithm to be used + * (see comment on EXP_GOLOMB_K macro for details about k) + * Return: prefix for given @n and @k * - * The Exponential Golomb algorithm generates @b prefixes (@b not suffixes!) + * The Exponential Golomb algorithm generates prefixes (NOT suffixes!) * with growing length and with the mathematical property of being * "prefix-free". The latter means the generated prefixes can be prepended * in front of arbitrary numbers and the resulting concatenated numbers are * guaranteed to be always unique. * * This is a minor adjustment to the original Exp. Golomb algorithm in the - * sense that lowest allowed index (@param n) starts with 1, not with zero. - * - * @param n - natural number (or index) of the prefix to be generated - * (1, 2, 3, ...) - * @param k - parameter k of Exp. Golomb algorithm to be used - * (see comment on EXP_GOLOMB_K macro for details about k) + * sense that lowest allowed index (@n) starts with 1, not with zero. */ static VariLenAffix expGolombEncode(uint64_t n, int k) { @@ -661,7 +679,9 @@ static VariLenAffix expGolombEncode(uint64_t n, int k) } /** - * @brief Converts a suffix into a prefix, or a prefix into a suffix. + * invertAffix() - Converts a suffix into a prefix, or a prefix into a suffix. + * @affix: either suffix or prefix to be inverted + * Return: inversion of passed @affix * * Simply mirror all bits of the affix value, for the purpose to preserve * respectively the mathematical "prefix-free" or "suffix-free" property @@ -685,16 +705,16 @@ static VariLenAffix invertAffix(const VariLenAffix *affix) } /** - * @brief Generates suffix numbers with "suffix-free" property. + * affixForIndex() - Generates suffix numbers with "suffix-free" property. + * @index: natural number (or index) of the suffix to be generated + * (1, 2, 3, ...) + * Return: Suffix suitable to assemble unique number. * * This is just a wrapper function on top of the Exp. Golomb algorithm. * * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes, * this function converts the Exp. Golomb prefixes into appropriate suffixes * which are still suitable for generating unique numbers. - * - * @param n - natural number (or index) of the suffix to be generated - * (1, 2, 3, ...) */ static VariLenAffix affixForIndex(uint64_t index) { @@ -794,8 +814,8 @@ static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev) return val->prefix_bits; } -/** - * @brief Slow / full mapping host inode nr -> guest inode nr. +/* + * Slow / full mapping host inode nr -> guest inode nr. * * This function performs a slower and much more costly remapping of an * original file inode number on host to an appropriate different inode @@ -807,7 +827,7 @@ static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev) * qid_path_suffixmap() failed. In practice this slow / full mapping is not * expected ever to be used at all though. * - * @see qid_path_suffixmap() for details + * See qid_path_suffixmap() for details * */ static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf, @@ -848,8 +868,8 @@ static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf, return 0; } -/** - * @brief Quick mapping host inode nr -> guest inode nr. +/* + * Quick mapping host inode nr -> guest inode nr. * * This function performs quick remapping of an original file inode number * on host to an appropriate different inode number on guest. This remapping @@ -1265,12 +1285,15 @@ static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path, /** - * Convert host filesystem's block size into an appropriate block size for - * 9p client (guest OS side). The value returned suggests an "optimum" block - * size for 9p I/O, i.e. to maximize performance. + * blksize_to_iounit() - Block size exposed to 9p client. + * Return: block size * * @pdu: 9p client request * @blksize: host filesystem's block size + * + * Convert host filesystem's block size into an appropriate block size for + * 9p client (guest OS side). The value returned suggests an "optimum" block + * size for 9p I/O, i.e. to maximize performance. */ static int32_t blksize_to_iounit(const V9fsPDU *pdu, int32_t blksize) { @@ -1309,11 +1332,17 @@ static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf, v9lstat->st_blksize = stat_to_iounit(pdu, stbuf); v9lstat->st_blocks = stbuf->st_blocks; v9lstat->st_atime_sec = stbuf->st_atime; - v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec; v9lstat->st_mtime_sec = stbuf->st_mtime; - v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec; v9lstat->st_ctime_sec = stbuf->st_ctime; +#ifdef CONFIG_DARWIN + v9lstat->st_atime_nsec = stbuf->st_atimespec.tv_nsec; + v9lstat->st_mtime_nsec = stbuf->st_mtimespec.tv_nsec; + v9lstat->st_ctime_nsec = stbuf->st_ctimespec.tv_nsec; +#else + v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec; + v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec; v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec; +#endif /* Currently we only support BASIC fields in stat */ v9lstat->st_result_mask = P9_STATS_BASIC; @@ -2271,7 +2300,7 @@ static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu, count += len; v9fs_stat_free(&v9stat); v9fs_path_free(&path); - saved_dir_pos = dent->d_off; + saved_dir_pos = qemu_dirent_off(dent); } v9fs_readdir_unlock(&fidp->fs.dir); @@ -2376,10 +2405,11 @@ out_nofid: } /** - * Returns size required in Rreaddir response for the passed dirent @p name. + * v9fs_readdir_response_size() - Returns size required in Rreaddir response + * for the passed dirent @name. * - * @param name - directory entry's name (i.e. file name, directory name) - * @returns required size in bytes + * @name: directory entry's name (i.e. file name, directory name) + * Return: required size in bytes */ size_t v9fs_readdir_response_size(V9fsString *name) { @@ -2410,6 +2440,7 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, V9fsString name; int len, err = 0; int32_t count = 0; + off_t off; struct dirent *dent; struct stat *st; struct V9fsDirEnt *entries = NULL; @@ -2470,12 +2501,13 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, qid.version = 0; } + off = qemu_dirent_off(dent); v9fs_string_init(&name); v9fs_string_sprintf(&name, "%s", dent->d_name); /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ len = pdu_marshal(pdu, 11 + count, "Qqbs", - &qid, dent->d_off, + &qid, off, dent->d_type, &name); v9fs_string_free(&name); @@ -3515,9 +3547,15 @@ static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf) f_bavail = stbuf->f_bavail / bsize_factor; f_files = stbuf->f_files; f_ffree = stbuf->f_ffree; +#ifdef CONFIG_DARWIN + fsid_val = (unsigned int)stbuf->f_fsid.val[0] | + (unsigned long long)stbuf->f_fsid.val[1] << 32; + f_namelen = NAME_MAX; +#else fsid_val = (unsigned int) stbuf->f_fsid.__val[0] | (unsigned long long)stbuf->f_fsid.__val[1] << 32; f_namelen = stbuf->f_namelen; +#endif return pdu_marshal(pdu, offset, "ddqqqqqqd", f_type, f_bsize, f_blocks, f_bfree, @@ -3919,7 +3957,7 @@ static void coroutine_fn v9fs_xattrcreate(void *opaque) rflags |= XATTR_REPLACE; } - if (size > XATTR_SIZE_MAX) { + if (size > P9_XATTR_SIZE_MAX) { err = -E2BIG; goto out_nofid; } diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index 1567b67841..af2635fae9 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -100,8 +100,8 @@ typedef enum P9ProtoVersion { V9FS_PROTO_2000L = 0x02, } P9ProtoVersion; -/** - * @brief Minimum message size supported by this 9pfs server. +/* + * Minimum message size supported by this 9pfs server. * * A client establishes a session by sending a Tversion request along with a * 'msize' parameter which suggests the server a maximum message size ever to be @@ -231,7 +231,7 @@ static inline void v9fs_readdir_init(P9ProtoVersion proto_version, V9fsDir *dir) } } -/** +/* * Type for 9p fs drivers' (a.k.a. 9p backends) result of readdir requests, * which is a chained list of directory entries. */ @@ -289,8 +289,8 @@ typedef enum AffixType_t { AffixType_Suffix, /* A.k.a. postfix. */ } AffixType_t; -/** - * @brief Unique affix of variable length. +/* + * Unique affix of variable length. * * An affix is (currently) either a suffix or a prefix, which is either * going to be prepended (prefix) or appended (suffix) with some other @@ -304,7 +304,7 @@ typedef struct VariLenAffix { AffixType_t type; /* Whether this affix is a suffix or a prefix. */ uint64_t value; /* Actual numerical value of this affix. */ /* - * Lenght of the affix, that is how many (of the lowest) bits of @c value + * Lenght of the affix, that is how many (of the lowest) bits of ``value`` * must be used for appending/prepending this affix to its final resulting, * unique number. */ @@ -479,4 +479,22 @@ struct V9fsTransport { void (*push_and_notify)(V9fsPDU *pdu); }; +#if defined(XATTR_SIZE_MAX) +/* Linux */ +#define P9_XATTR_SIZE_MAX XATTR_SIZE_MAX +#elif defined(CONFIG_DARWIN) +/* + * Darwin doesn't seem to define a maximum xattr size in its user + * space header, so manually configure it across platforms as 64k. + * + * Having no limit at all can lead to QEMU crashing during large g_malloc() + * calls. Because QEMU does not currently support macOS guests, the below + * preliminary solution only works due to its being a reflection of the limit of + * Linux guests. + */ +#define P9_XATTR_SIZE_MAX 65536 +#else +#error Missing definition for P9_XATTR_SIZE_MAX for this host system +#endif + #endif diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c index c0873bde16..75148bc985 100644 --- a/hw/9pfs/codir.c +++ b/hw/9pfs/codir.c @@ -22,6 +22,8 @@ #include "qemu/coroutine.h" #include "qemu/main-loop.h" #include "coth.h" +#include "9p-xattr.h" +#include "9p-util.h" /* * Intended to be called from bottom-half (e.g. background I/O thread) @@ -166,7 +168,7 @@ static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp, } size += len; - saved_dir_pos = dent->d_off; + saved_dir_pos = qemu_dirent_off(dent); } /* restore (last) saved position */ @@ -182,14 +184,25 @@ out: } /** - * @brief Reads multiple directory entries in one rush. + * v9fs_co_readdir_many() - Reads multiple directory entries in one rush. + * + * @pdu: the causing 9p (T_readdir) client request + * @fidp: already opened directory where readdir shall be performed on + * @entries: output for directory entries (must not be NULL) + * @offset: initial position inside the directory the function shall + * seek to before retrieving the directory entries + * @maxsize: maximum result message body size (in bytes) + * @dostat: whether a stat() should be performed and returned for + * each directory entry + * Return: resulting response message body size (in bytes) on success, + * negative error code otherwise * * Retrieves the requested (max. amount of) directory entries from the fs * driver. This function must only be called by the main IO thread (top half). * Internally this function call will be dispatched to a background IO thread * (bottom half) where it is eventually executed by the fs driver. * - * @discussion Acquiring multiple directory entries in one rush from the fs + * Acquiring multiple directory entries in one rush from the fs * driver, instead of retrieving each directory entry individually, is very * beneficial from performance point of view. Because for every fs driver * request latency is added, which in practice could lead to overall @@ -197,20 +210,9 @@ out: * directory) if every directory entry was individually requested from fs * driver. * - * @note You must @b ALWAYS call @c v9fs_free_dirents(entries) after calling + * NOTE: You must ALWAYS call v9fs_free_dirents(entries) after calling * v9fs_co_readdir_many(), both on success and on error cases of this - * function, to avoid memory leaks once @p entries are no longer needed. - * - * @param pdu - the causing 9p (T_readdir) client request - * @param fidp - already opened directory where readdir shall be performed on - * @param entries - output for directory entries (must not be NULL) - * @param offset - initial position inside the directory the function shall - * seek to before retrieving the directory entries - * @param maxsize - maximum result message body size (in bytes) - * @param dostat - whether a stat() should be performed and returned for - * each directory entry - * @returns resulting response message body size (in bytes) on success, - * negative error code otherwise + * function, to avoid memory leaks once @entries are no longer needed. */ int coroutine_fn v9fs_co_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp, struct V9fsDirEnt **entries, diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h index f83c7dda7b..1a1edbdc2a 100644 --- a/hw/9pfs/coth.h +++ b/hw/9pfs/coth.h @@ -19,7 +19,7 @@ #include "qemu/coroutine.h" #include "9p.h" -/** +/* * we want to use bottom half because we want to make sure the below * sequence of events. * @@ -29,7 +29,7 @@ * we cannot swap step 1 and 2, because that would imply worker thread * can enter coroutine while step1 is still running * - * @b PERFORMANCE @b CONSIDERATIONS: As a rule of thumb, keep in mind + * PERFORMANCE CONSIDERATIONS: As a rule of thumb, keep in mind * that hopping between threads adds @b latency! So when handling a * 9pfs request, avoid calling v9fs_co_run_in_worker() too often, because * this might otherwise sum up to a significant, huge overall latency for diff --git a/hw/9pfs/meson.build b/hw/9pfs/meson.build index 99be5d9119..12443b6ad5 100644 --- a/hw/9pfs/meson.build +++ b/hw/9pfs/meson.build @@ -4,7 +4,6 @@ fs_ss.add(files( '9p-posix-acl.c', '9p-proxy.c', '9p-synth.c', - '9p-util.c', '9p-xattr-user.c', '9p-xattr.c', '9p.c', @@ -14,6 +13,8 @@ fs_ss.add(files( 'coth.c', 'coxattr.c', )) +fs_ss.add(when: 'CONFIG_LINUX', if_true: files('9p-util-linux.c')) +fs_ss.add(when: 'CONFIG_DARWIN', if_true: files('9p-util-darwin.c')) fs_ss.add(when: 'CONFIG_XEN', if_true: files('xen-9p-backend.c')) softmmu_ss.add_all(when: 'CONFIG_FSDEV_9P', if_true: fs_ss) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 46bf7ceddf..46a42502bc 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -2102,6 +2102,10 @@ static void machvirt_init(MachineState *machine) object_property_set_bool(cpuobj, "pmu", false, NULL); } + if (vmc->no_tcg_lpa2 && object_property_find(cpuobj, "lpa2")) { + object_property_set_bool(cpuobj, "lpa2", false, NULL); + } + if (object_property_find(cpuobj, "reset-cbar")) { object_property_set_int(cpuobj, "reset-cbar", vms->memmap[VIRT_CPUPERIPHS].base, @@ -3020,8 +3024,11 @@ DEFINE_VIRT_MACHINE_AS_LATEST(7, 0) static void virt_machine_6_2_options(MachineClass *mc) { + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + virt_machine_7_0_options(mc); compat_props_add(mc->compat_props, hw_compat_6_2, hw_compat_6_2_len); + vmc->no_tcg_lpa2 = true; } DEFINE_VIRT_MACHINE(6, 2) diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index 860787580a..2785b9e849 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -21,6 +21,7 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" +#include "qemu/memalign.h" #include "qapi/error.h" #include "hw/xen/xen_common.h" #include "hw/block/xen_blkif.h" diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 21d18ac2e3..347875a0cd 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -32,6 +32,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/timer.h" +#include "qemu/memalign.h" #include "hw/irq.h" #include "hw/isa/isa.h" #include "hw/qdev-properties.h" diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index 81f9f971d8..74c7190302 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -1023,7 +1023,7 @@ static void postload_update_cb(void *opaque, bool running, RunState state) { PFlashCFI01 *pfl = opaque; - /* This is called after bdrv_invalidate_cache_all. */ + /* This is called after bdrv_activate_all. */ qemu_del_vm_change_state_handler(pfl->vmstate); pfl->vmstate = NULL; diff --git a/hw/display/edid-generate.c b/hw/display/edid-generate.c index bccf32af69..2cb819675e 100644 --- a/hw/display/edid-generate.c +++ b/hw/display/edid-generate.c @@ -255,33 +255,31 @@ static void edid_desc_dummy(uint8_t *desc) edid_desc_type(desc, 0x10); } -static void edid_desc_timing(uint8_t *desc, uint32_t refresh_rate, +static void edid_desc_timing(uint8_t *desc, const Timings *timings, uint32_t xres, uint32_t yres, uint32_t xmm, uint32_t ymm) { - Timings timings; - generate_timings(&timings, refresh_rate, xres, yres); - stl_le_p(desc, timings.clock); + stw_le_p(desc, timings->clock); desc[2] = xres & 0xff; - desc[3] = timings.xblank & 0xff; + desc[3] = timings->xblank & 0xff; desc[4] = (((xres & 0xf00) >> 4) | - ((timings.xblank & 0xf00) >> 8)); + ((timings->xblank & 0xf00) >> 8)); desc[5] = yres & 0xff; - desc[6] = timings.yblank & 0xff; + desc[6] = timings->yblank & 0xff; desc[7] = (((yres & 0xf00) >> 4) | - ((timings.yblank & 0xf00) >> 8)); + ((timings->yblank & 0xf00) >> 8)); - desc[8] = timings.xfront & 0xff; - desc[9] = timings.xsync & 0xff; + desc[8] = timings->xfront & 0xff; + desc[9] = timings->xsync & 0xff; - desc[10] = (((timings.yfront & 0x00f) << 4) | - ((timings.ysync & 0x00f) << 0)); - desc[11] = (((timings.xfront & 0x300) >> 2) | - ((timings.xsync & 0x300) >> 4) | - ((timings.yfront & 0x030) >> 2) | - ((timings.ysync & 0x030) >> 4)); + desc[10] = (((timings->yfront & 0x00f) << 4) | + ((timings->ysync & 0x00f) << 0)); + desc[11] = (((timings->xfront & 0x300) >> 2) | + ((timings->xsync & 0x300) >> 4) | + ((timings->yfront & 0x030) >> 2) | + ((timings->ysync & 0x030) >> 4)); desc[12] = xmm & 0xff; desc[13] = ymm & 0xff; @@ -348,13 +346,10 @@ static void init_displayid(uint8_t *did) edid_checksum(did + 1, did[2] + 4); } -static void qemu_displayid_generate(uint8_t *did, uint32_t refresh_rate, +static void qemu_displayid_generate(uint8_t *did, const Timings *timings, uint32_t xres, uint32_t yres, uint32_t xmm, uint32_t ymm) { - Timings timings; - generate_timings(&timings, refresh_rate, xres, yres); - did[0] = 0x70; /* display id extension */ did[1] = 0x13; /* version 1.3 */ did[2] = 23; /* length */ @@ -364,21 +359,21 @@ static void qemu_displayid_generate(uint8_t *did, uint32_t refresh_rate, did[6] = 0x00; /* revision */ did[7] = 0x14; /* block length */ - did[8] = timings.clock & 0xff; - did[9] = (timings.clock & 0xff00) >> 8; - did[10] = (timings.clock & 0xff0000) >> 16; + did[8] = timings->clock & 0xff; + did[9] = (timings->clock & 0xff00) >> 8; + did[10] = (timings->clock & 0xff0000) >> 16; did[11] = 0x88; /* leave aspect ratio undefined */ stw_le_p(did + 12, 0xffff & (xres - 1)); - stw_le_p(did + 14, 0xffff & (timings.xblank - 1)); - stw_le_p(did + 16, 0xffff & (timings.xfront - 1)); - stw_le_p(did + 18, 0xffff & (timings.xsync - 1)); + stw_le_p(did + 14, 0xffff & (timings->xblank - 1)); + stw_le_p(did + 16, 0xffff & (timings->xfront - 1)); + stw_le_p(did + 18, 0xffff & (timings->xsync - 1)); stw_le_p(did + 20, 0xffff & (yres - 1)); - stw_le_p(did + 22, 0xffff & (timings.yblank - 1)); - stw_le_p(did + 24, 0xffff & (timings.yfront - 1)); - stw_le_p(did + 26, 0xffff & (timings.ysync - 1)); + stw_le_p(did + 22, 0xffff & (timings->yblank - 1)); + stw_le_p(did + 24, 0xffff & (timings->yfront - 1)); + stw_le_p(did + 26, 0xffff & (timings->ysync - 1)); edid_checksum(did + 1, did[2] + 4); } @@ -386,6 +381,7 @@ static void qemu_displayid_generate(uint8_t *did, uint32_t refresh_rate, void qemu_edid_generate(uint8_t *edid, size_t size, qemu_edid_info *info) { + Timings timings; uint8_t *desc = edid + 54; uint8_t *xtra3 = NULL; uint8_t *dta = NULL; @@ -409,9 +405,6 @@ void qemu_edid_generate(uint8_t *edid, size_t size, if (!info->prefy) { info->prefy = 800; } - if (info->prefx >= 4096 || info->prefy >= 4096) { - large_screen = 1; - } if (info->width_mm && info->height_mm) { width_mm = info->width_mm; height_mm = info->height_mm; @@ -421,6 +414,11 @@ void qemu_edid_generate(uint8_t *edid, size_t size, height_mm = qemu_edid_dpi_to_mm(dpi, info->prefy); } + generate_timings(&timings, refresh_rate, info->prefx, info->prefy); + if (info->prefx >= 4096 || info->prefy >= 4096 || timings.clock >= 65536) { + large_screen = 1; + } + /* =============== extensions =============== */ if (size >= 256) { @@ -501,7 +499,7 @@ void qemu_edid_generate(uint8_t *edid, size_t size, if (!large_screen) { /* The DTD section has only 12 bits to store the resolution */ - edid_desc_timing(desc, refresh_rate, info->prefx, info->prefy, + edid_desc_timing(desc, &timings, info->prefx, info->prefy, width_mm, height_mm); desc = edid_desc_next(edid, dta, desc); } @@ -536,7 +534,7 @@ void qemu_edid_generate(uint8_t *edid, size_t size, /* =============== display id extensions =============== */ if (did && large_screen) { - qemu_displayid_generate(did, refresh_rate, info->prefx, info->prefy, + qemu_displayid_generate(did, &timings, info->prefx, info->prefy, width_mm, height_mm); } diff --git a/hw/display/trace-events b/hw/display/trace-events index 4a687d1b8e..91efc88f04 100644 --- a/hw/display/trace-events +++ b/hw/display/trace-events @@ -21,6 +21,9 @@ vmware_palette_write(uint32_t index, uint32_t value) "index %d, value 0x%x" vmware_scratch_read(uint32_t index, uint32_t value) "index %d, value 0x%x" vmware_scratch_write(uint32_t index, uint32_t value) "index %d, value 0x%x" vmware_setmode(uint32_t w, uint32_t h, uint32_t bpp) "%dx%d @ %d bpp" +vmware_verify_rect_less_than_zero(const char *name, const char *param, int x) "%s: %s was < 0 (%d)" +vmware_verify_rect_greater_than_bound(const char *name, const char *param, int bound, int x) "%s: %s was > %d (%d)" +vmware_verify_rect_surface_bound_exceeded(const char *name, const char *component, int bound, const char *param1, int value1, const char *param2, int value2) "%s: %s > %d (%s: %d, %s: %d)" # virtio-gpu-base.c virtio_gpu_features(bool virgl) "virgl %d" diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c index e2969a6c81..0cc43a1f15 100644 --- a/hw/display/vmware_vga.c +++ b/hw/display/vmware_vga.c @@ -297,46 +297,52 @@ static inline bool vmsvga_verify_rect(DisplaySurface *surface, int x, int y, int w, int h) { if (x < 0) { - fprintf(stderr, "%s: x was < 0 (%d)\n", name, x); + trace_vmware_verify_rect_less_than_zero(name, "x", x); return false; } if (x > SVGA_MAX_WIDTH) { - fprintf(stderr, "%s: x was > %d (%d)\n", name, SVGA_MAX_WIDTH, x); + trace_vmware_verify_rect_greater_than_bound(name, "x", SVGA_MAX_WIDTH, + x); return false; } if (w < 0) { - fprintf(stderr, "%s: w was < 0 (%d)\n", name, w); + trace_vmware_verify_rect_less_than_zero(name, "w", w); return false; } if (w > SVGA_MAX_WIDTH) { - fprintf(stderr, "%s: w was > %d (%d)\n", name, SVGA_MAX_WIDTH, w); + trace_vmware_verify_rect_greater_than_bound(name, "w", SVGA_MAX_WIDTH, + w); return false; } if (x + w > surface_width(surface)) { - fprintf(stderr, "%s: width was > %d (x: %d, w: %d)\n", - name, surface_width(surface), x, w); + trace_vmware_verify_rect_surface_bound_exceeded(name, "width", + surface_width(surface), + "x", x, "w", w); return false; } if (y < 0) { - fprintf(stderr, "%s: y was < 0 (%d)\n", name, y); + trace_vmware_verify_rect_less_than_zero(name, "y", y); return false; } if (y > SVGA_MAX_HEIGHT) { - fprintf(stderr, "%s: y was > %d (%d)\n", name, SVGA_MAX_HEIGHT, y); + trace_vmware_verify_rect_greater_than_bound(name, "y", SVGA_MAX_HEIGHT, + y); return false; } if (h < 0) { - fprintf(stderr, "%s: h was < 0 (%d)\n", name, h); + trace_vmware_verify_rect_less_than_zero(name, "h", h); return false; } if (h > SVGA_MAX_HEIGHT) { - fprintf(stderr, "%s: h was > %d (%d)\n", name, SVGA_MAX_HEIGHT, h); + trace_vmware_verify_rect_greater_than_bound(name, "y", SVGA_MAX_HEIGHT, + y); return false; } if (y + h > surface_height(surface)) { - fprintf(stderr, "%s: update height > %d (y: %d, h: %d)\n", - name, surface_height(surface), y, h); + trace_vmware_verify_rect_surface_bound_exceeded(name, "height", + surface_height(surface), + "y", y, "h", h); return false; } diff --git a/hw/ide/core.c b/hw/ide/core.c index 33463d9b8f..3a5afff5d7 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -30,6 +30,7 @@ #include "qemu/main-loop.h" #include "qemu/timer.h" #include "qemu/hw-version.h" +#include "qemu/memalign.h" #include "sysemu/sysemu.h" #include "sysemu/blockdev.h" #include "sysemu/dma.h" @@ -434,12 +435,16 @@ static const AIOCBInfo trim_aiocb_info = { static void ide_trim_bh_cb(void *opaque) { TrimAIOCB *iocb = opaque; + BlockBackend *blk = iocb->s->blk; iocb->common.cb(iocb->common.opaque, iocb->ret); qemu_bh_delete(iocb->bh); iocb->bh = NULL; qemu_aio_unref(iocb); + + /* Paired with an increment in ide_issue_trim() */ + blk_dec_in_flight(blk); } static void ide_issue_trim_cb(void *opaque, int ret) @@ -509,6 +514,9 @@ BlockAIOCB *ide_issue_trim( IDEState *s = opaque; TrimAIOCB *iocb; + /* Paired with a decrement in ide_trim_bh_cb() */ + blk_inc_in_flight(s->blk); + iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); iocb->s = s; iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig index 528e77b4a6..ec8d4cec29 100644 --- a/hw/intc/Kconfig +++ b/hw/intc/Kconfig @@ -73,6 +73,9 @@ config RISCV_ACLINT config RISCV_APLIC bool +config RISCV_IMSIC + bool + config SIFIVE_PLIC bool diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c index 6d3c8ee231..0b8f79a122 100644 --- a/hw/intc/arm_gicv3.c +++ b/hw/intc/arm_gicv3.c @@ -369,11 +369,19 @@ static const MemoryRegionOps gic_ops[] = { .read_with_attrs = gicv3_dist_read, .write_with_attrs = gicv3_dist_write, .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 1, + .valid.max_access_size = 8, + .impl.min_access_size = 1, + .impl.max_access_size = 8, }, { .read_with_attrs = gicv3_redist_read, .write_with_attrs = gicv3_redist_write, .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 1, + .valid.max_access_size = 8, + .impl.min_access_size = 1, + .impl.max_access_size = 8, } }; diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c index d7e03d0cab..1a3d440a54 100644 --- a/hw/intc/arm_gicv3_cpuif.c +++ b/hw/intc/arm_gicv3_cpuif.c @@ -612,7 +612,8 @@ static uint64_t icv_hppir_read(CPUARMState *env, const ARMCPRegInfo *ri) } } - trace_gicv3_icv_hppir_read(grp, gicv3_redist_affid(cs), value); + trace_gicv3_icv_hppir_read(ri->crm == 8 ? 0 : 1, + gicv3_redist_affid(cs), value); return value; } diff --git a/hw/intc/arm_gicv3_dist.c b/hw/intc/arm_gicv3_dist.c index 4164500ea9..28d913b211 100644 --- a/hw/intc/arm_gicv3_dist.c +++ b/hw/intc/arm_gicv3_dist.c @@ -838,7 +838,7 @@ MemTxResult gicv3_dist_read(void *opaque, hwaddr offset, uint64_t *data, if (!r) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest read at offset " TARGET_FMT_plx - "size %u\n", __func__, offset, size); + " size %u\n", __func__, offset, size); trace_gicv3_dist_badread(offset, size, attrs.secure); /* The spec requires that reserved registers are RAZ/WI; * so use MEMTX_ERROR returns from leaf functions as a way to @@ -879,7 +879,7 @@ MemTxResult gicv3_dist_write(void *opaque, hwaddr offset, uint64_t data, if (!r) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest write at offset " TARGET_FMT_plx - "size %u\n", __func__, offset, size); + " size %u\n", __func__, offset, size); trace_gicv3_dist_badwrite(offset, data, size, attrs.secure); /* The spec requires that reserved registers are RAZ/WI; * so use MEMTX_ERROR returns from leaf functions as a way to diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c index 4f598d3c14..b96b874afd 100644 --- a/hw/intc/arm_gicv3_its.c +++ b/hw/intc/arm_gicv3_its.c @@ -161,16 +161,22 @@ static MemTxResult get_cte(GICv3ITSState *s, uint16_t icid, CTEntry *cte) if (entry_addr == -1) { /* No L2 table entry, i.e. no valid CTE, or a memory error */ cte->valid = false; - return res; + goto out; } cteval = address_space_ldq_le(as, entry_addr, MEMTXATTRS_UNSPECIFIED, &res); if (res != MEMTX_OK) { - return res; + goto out; } cte->valid = FIELD_EX64(cteval, CTE, VALID); cte->rdbase = FIELD_EX64(cteval, CTE, RDBASE); - return MEMTX_OK; +out: + if (res != MEMTX_OK) { + trace_gicv3_its_cte_read_fault(icid); + } else { + trace_gicv3_its_cte_read(icid, cte->valid, cte->rdbase); + } + return res; } /* @@ -187,6 +193,10 @@ static bool update_ite(GICv3ITSState *s, uint32_t eventid, const DTEntry *dte, uint64_t itel = 0; uint32_t iteh = 0; + trace_gicv3_its_ite_write(dte->ittaddr, eventid, ite->valid, + ite->inttype, ite->intid, ite->icid, + ite->vpeid, ite->doorbell); + if (ite->valid) { itel = FIELD_DP64(itel, ITE_L, VALID, 1); itel = FIELD_DP64(itel, ITE_L, INTTYPE, ite->inttype); @@ -221,11 +231,13 @@ static MemTxResult get_ite(GICv3ITSState *s, uint32_t eventid, itel = address_space_ldq_le(as, iteaddr, MEMTXATTRS_UNSPECIFIED, &res); if (res != MEMTX_OK) { + trace_gicv3_its_ite_read_fault(dte->ittaddr, eventid); return res; } iteh = address_space_ldl_le(as, iteaddr + 8, MEMTXATTRS_UNSPECIFIED, &res); if (res != MEMTX_OK) { + trace_gicv3_its_ite_read_fault(dte->ittaddr, eventid); return res; } @@ -235,6 +247,9 @@ static MemTxResult get_ite(GICv3ITSState *s, uint32_t eventid, ite->icid = FIELD_EX64(itel, ITE_L, ICID); ite->vpeid = FIELD_EX64(itel, ITE_L, VPEID); ite->doorbell = FIELD_EX64(iteh, ITE_H, DOORBELL); + trace_gicv3_its_ite_read(dte->ittaddr, eventid, ite->valid, + ite->inttype, ite->intid, ite->icid, + ite->vpeid, ite->doorbell); return MEMTX_OK; } @@ -254,17 +269,23 @@ static MemTxResult get_dte(GICv3ITSState *s, uint32_t devid, DTEntry *dte) if (entry_addr == -1) { /* No L2 table entry, i.e. no valid DTE, or a memory error */ dte->valid = false; - return res; + goto out; } dteval = address_space_ldq_le(as, entry_addr, MEMTXATTRS_UNSPECIFIED, &res); if (res != MEMTX_OK) { - return res; + goto out; } dte->valid = FIELD_EX64(dteval, DTE, VALID); dte->size = FIELD_EX64(dteval, DTE, SIZE); /* DTE word field stores bits [51:8] of the ITT address */ dte->ittaddr = FIELD_EX64(dteval, DTE, ITTADDR) << ITTADDR_SHIFT; - return MEMTX_OK; +out: + if (res != MEMTX_OK) { + trace_gicv3_its_dte_read_fault(devid); + } else { + trace_gicv3_its_dte_read(devid, dte->valid, dte->size, dte->ittaddr); + } + return res; } /* @@ -366,6 +387,19 @@ static ItsCmdResult process_its_cmd(GICv3ITSState *s, const uint64_t *cmdpkt, devid = (cmdpkt[0] & DEVID_MASK) >> DEVID_SHIFT; eventid = cmdpkt[1] & EVENTID_MASK; + switch (cmd) { + case INTERRUPT: + trace_gicv3_its_cmd_int(devid, eventid); + break; + case CLEAR: + trace_gicv3_its_cmd_clear(devid, eventid); + break; + case DISCARD: + trace_gicv3_its_cmd_discard(devid, eventid); + break; + default: + g_assert_not_reached(); + } return do_process_its_cmd(s, devid, eventid, cmd); } @@ -382,15 +416,16 @@ static ItsCmdResult process_mapti(GICv3ITSState *s, const uint64_t *cmdpkt, devid = (cmdpkt[0] & DEVID_MASK) >> DEVID_SHIFT; eventid = cmdpkt[1] & EVENTID_MASK; + icid = cmdpkt[2] & ICID_MASK; if (ignore_pInt) { pIntid = eventid; + trace_gicv3_its_cmd_mapi(devid, eventid, icid); } else { pIntid = (cmdpkt[1] & pINTID_MASK) >> pINTID_SHIFT; + trace_gicv3_its_cmd_mapti(devid, eventid, icid, pIntid); } - icid = cmdpkt[2] & ICID_MASK; - if (devid >= s->dt.num_entries) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid command attributes: devid %d>=%d", @@ -451,6 +486,8 @@ static bool update_cte(GICv3ITSState *s, uint16_t icid, const CTEntry *cte) uint64_t cteval = 0; MemTxResult res = MEMTX_OK; + trace_gicv3_its_cte_write(icid, cte->valid, cte->rdbase); + if (cte->valid) { /* add mapping entry to collection table */ cteval = FIELD_DP64(cteval, CTE, VALID, 1); @@ -484,6 +521,7 @@ static ItsCmdResult process_mapc(GICv3ITSState *s, const uint64_t *cmdpkt) } else { cte.rdbase = 0; } + trace_gicv3_its_cmd_mapc(icid, cte.rdbase, cte.valid); if (icid >= s->ct.num_entries) { qemu_log_mask(LOG_GUEST_ERROR, "ITS MAPC: invalid ICID 0x%d", icid); @@ -509,6 +547,8 @@ static bool update_dte(GICv3ITSState *s, uint32_t devid, const DTEntry *dte) uint64_t dteval = 0; MemTxResult res = MEMTX_OK; + trace_gicv3_its_dte_write(devid, dte->valid, dte->size, dte->ittaddr); + if (dte->valid) { /* add mapping entry to device table */ dteval = FIELD_DP64(dteval, DTE, VALID, 1); @@ -539,6 +579,8 @@ static ItsCmdResult process_mapd(GICv3ITSState *s, const uint64_t *cmdpkt) dte.ittaddr = (cmdpkt[2] & ITTADDR_MASK) >> ITTADDR_SHIFT; dte.valid = cmdpkt[2] & CMD_FIELD_VALID_MASK; + trace_gicv3_its_cmd_mapd(devid, dte.size, dte.ittaddr, dte.valid); + if (devid >= s->dt.num_entries) { qemu_log_mask(LOG_GUEST_ERROR, "ITS MAPD: invalid device ID field 0x%x >= 0x%x\n", @@ -562,6 +604,8 @@ static ItsCmdResult process_movall(GICv3ITSState *s, const uint64_t *cmdpkt) rd1 = FIELD_EX64(cmdpkt[2], MOVALL_2, RDBASE1); rd2 = FIELD_EX64(cmdpkt[3], MOVALL_3, RDBASE2); + trace_gicv3_its_cmd_movall(rd1, rd2); + if (rd1 >= s->gicv3->num_cpu) { qemu_log_mask(LOG_GUEST_ERROR, "%s: RDBASE1 %" PRId64 @@ -601,6 +645,8 @@ static ItsCmdResult process_movi(GICv3ITSState *s, const uint64_t *cmdpkt) eventid = FIELD_EX64(cmdpkt[1], MOVI_1, EVENTID); new_icid = FIELD_EX64(cmdpkt[2], MOVI_2, ICID); + trace_gicv3_its_cmd_movi(devid, eventid, new_icid); + if (devid >= s->dt.num_entries) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid command attributes: devid %d>=%d", @@ -779,6 +825,7 @@ static void process_cmdq(GICv3ITSState *s) * is already consistent by the time SYNC command is executed. * Hence no further processing is required for SYNC command. */ + trace_gicv3_its_cmd_sync(); break; case GITS_CMD_MAPD: result = process_mapd(s, cmdpkt); @@ -803,6 +850,7 @@ static void process_cmdq(GICv3ITSState *s) * need to trigger lpi priority re-calculation to be in * sync with LPI config table or pending table changes. */ + trace_gicv3_its_cmd_inv(); for (i = 0; i < s->gicv3->num_cpu; i++) { gicv3_redist_update_lpi(&s->gicv3->cpu[i]); } @@ -814,6 +862,7 @@ static void process_cmdq(GICv3ITSState *s) result = process_movall(s, cmdpkt); break; default: + trace_gicv3_its_cmd_unknown(cmd); break; } if (result == CMD_CONTINUE) { @@ -1264,7 +1313,7 @@ static MemTxResult gicv3_its_read(void *opaque, hwaddr offset, uint64_t *data, if (!result) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest read at offset " TARGET_FMT_plx - "size %u\n", __func__, offset, size); + " size %u\n", __func__, offset, size); trace_gicv3_its_badread(offset, size); /* * The spec requires that reserved registers are RAZ/WI; @@ -1300,7 +1349,7 @@ static MemTxResult gicv3_its_write(void *opaque, hwaddr offset, uint64_t data, if (!result) { qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid guest write at offset " TARGET_FMT_plx - "size %u\n", __func__, offset, size); + " size %u\n", __func__, offset, size); trace_gicv3_its_badwrite(offset, data, size); /* * The spec requires that reserved registers are RAZ/WI; diff --git a/hw/intc/meson.build b/hw/intc/meson.build index d953197413..81ccdb0d78 100644 --- a/hw/intc/meson.build +++ b/hw/intc/meson.build @@ -51,6 +51,7 @@ specific_ss.add(when: 'CONFIG_S390_FLIC_KVM', if_true: files('s390_flic_kvm.c')) specific_ss.add(when: 'CONFIG_SH_INTC', if_true: files('sh_intc.c')) specific_ss.add(when: 'CONFIG_RISCV_ACLINT', if_true: files('riscv_aclint.c')) specific_ss.add(when: 'CONFIG_RISCV_APLIC', if_true: files('riscv_aplic.c')) +specific_ss.add(when: 'CONFIG_RISCV_IMSIC', if_true: files('riscv_imsic.c')) specific_ss.add(when: 'CONFIG_SIFIVE_PLIC', if_true: files('sifive_plic.c')) specific_ss.add(when: 'CONFIG_XICS', if_true: files('xics.c', 'xive2.c')) specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_XICS'], diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c new file mode 100644 index 0000000000..8615e4cc1d --- /dev/null +++ b/hw/intc/riscv_imsic.c @@ -0,0 +1,448 @@ +/* + * RISC-V IMSIC (Incoming Message Signaled Interrupt Controller) + * + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "qemu/error-report.h" +#include "qemu/bswap.h" +#include "exec/address-spaces.h" +#include "hw/sysbus.h" +#include "hw/pci/msi.h" +#include "hw/boards.h" +#include "hw/qdev-properties.h" +#include "hw/intc/riscv_imsic.h" +#include "hw/irq.h" +#include "target/riscv/cpu.h" +#include "target/riscv/cpu_bits.h" +#include "sysemu/sysemu.h" +#include "migration/vmstate.h" + +#define IMSIC_MMIO_PAGE_LE 0x00 +#define IMSIC_MMIO_PAGE_BE 0x04 + +#define IMSIC_MIN_ID ((IMSIC_EIPx_BITS * 2) - 1) +#define IMSIC_MAX_ID (IMSIC_TOPEI_IID_MASK) + +#define IMSIC_EISTATE_PENDING (1U << 0) +#define IMSIC_EISTATE_ENABLED (1U << 1) +#define IMSIC_EISTATE_ENPEND (IMSIC_EISTATE_ENABLED | \ + IMSIC_EISTATE_PENDING) + +static uint32_t riscv_imsic_topei(RISCVIMSICState *imsic, uint32_t page) +{ + uint32_t i, max_irq, base; + + base = page * imsic->num_irqs; + max_irq = (imsic->eithreshold[page] && + (imsic->eithreshold[page] <= imsic->num_irqs)) ? + imsic->eithreshold[page] : imsic->num_irqs; + for (i = 1; i < max_irq; i++) { + if ((imsic->eistate[base + i] & IMSIC_EISTATE_ENPEND) == + IMSIC_EISTATE_ENPEND) { + return (i << IMSIC_TOPEI_IID_SHIFT) | i; + } + } + + return 0; +} + +static void riscv_imsic_update(RISCVIMSICState *imsic, uint32_t page) +{ + if (imsic->eidelivery[page] && riscv_imsic_topei(imsic, page)) { + qemu_irq_raise(imsic->external_irqs[page]); + } else { + qemu_irq_lower(imsic->external_irqs[page]); + } +} + +static int riscv_imsic_eidelivery_rmw(RISCVIMSICState *imsic, uint32_t page, + target_ulong *val, + target_ulong new_val, + target_ulong wr_mask) +{ + target_ulong old_val = imsic->eidelivery[page]; + + if (val) { + *val = old_val; + } + + wr_mask &= 0x1; + imsic->eidelivery[page] = (old_val & ~wr_mask) | (new_val & wr_mask); + + riscv_imsic_update(imsic, page); + return 0; +} + +static int riscv_imsic_eithreshold_rmw(RISCVIMSICState *imsic, uint32_t page, + target_ulong *val, + target_ulong new_val, + target_ulong wr_mask) +{ + target_ulong old_val = imsic->eithreshold[page]; + + if (val) { + *val = old_val; + } + + wr_mask &= IMSIC_MAX_ID; + imsic->eithreshold[page] = (old_val & ~wr_mask) | (new_val & wr_mask); + + riscv_imsic_update(imsic, page); + return 0; +} + +static int riscv_imsic_topei_rmw(RISCVIMSICState *imsic, uint32_t page, + target_ulong *val, target_ulong new_val, + target_ulong wr_mask) +{ + uint32_t base, topei = riscv_imsic_topei(imsic, page); + + /* Read pending and enabled interrupt with highest priority */ + if (val) { + *val = topei; + } + + /* Writes ignore value and clear top pending interrupt */ + if (topei && wr_mask) { + topei >>= IMSIC_TOPEI_IID_SHIFT; + base = page * imsic->num_irqs; + if (topei) { + imsic->eistate[base + topei] &= ~IMSIC_EISTATE_PENDING; + } + + riscv_imsic_update(imsic, page); + } + + return 0; +} + +static int riscv_imsic_eix_rmw(RISCVIMSICState *imsic, + uint32_t xlen, uint32_t page, + uint32_t num, bool pend, target_ulong *val, + target_ulong new_val, target_ulong wr_mask) +{ + uint32_t i, base; + target_ulong mask; + uint32_t state = (pend) ? IMSIC_EISTATE_PENDING : IMSIC_EISTATE_ENABLED; + + if (xlen != 32) { + if (num & 0x1) { + return -EINVAL; + } + num >>= 1; + } + if (num >= (imsic->num_irqs / xlen)) { + return -EINVAL; + } + + base = (page * imsic->num_irqs) + (num * xlen); + + if (val) { + *val = 0; + for (i = 0; i < xlen; i++) { + mask = (target_ulong)1 << i; + *val |= (imsic->eistate[base + i] & state) ? mask : 0; + } + } + + for (i = 0; i < xlen; i++) { + /* Bit0 of eip0 and eie0 are read-only zero */ + if (!num && !i) { + continue; + } + + mask = (target_ulong)1 << i; + if (wr_mask & mask) { + if (new_val & mask) { + imsic->eistate[base + i] |= state; + } else { + imsic->eistate[base + i] &= ~state; + } + } + } + + riscv_imsic_update(imsic, page); + return 0; +} + +static int riscv_imsic_rmw(void *arg, target_ulong reg, target_ulong *val, + target_ulong new_val, target_ulong wr_mask) +{ + RISCVIMSICState *imsic = arg; + uint32_t isel, priv, virt, vgein, xlen, page; + + priv = AIA_IREG_PRIV(reg); + virt = AIA_IREG_VIRT(reg); + isel = AIA_IREG_ISEL(reg); + vgein = AIA_IREG_VGEIN(reg); + xlen = AIA_IREG_XLEN(reg); + + if (imsic->mmode) { + if (priv == PRV_M && !virt) { + page = 0; + } else { + goto err; + } + } else { + if (priv == PRV_S) { + if (virt) { + if (vgein && vgein < imsic->num_pages) { + page = vgein; + } else { + goto err; + } + } else { + page = 0; + } + } else { + goto err; + } + } + + switch (isel) { + case ISELECT_IMSIC_EIDELIVERY: + return riscv_imsic_eidelivery_rmw(imsic, page, val, + new_val, wr_mask); + case ISELECT_IMSIC_EITHRESHOLD: + return riscv_imsic_eithreshold_rmw(imsic, page, val, + new_val, wr_mask); + case ISELECT_IMSIC_TOPEI: + return riscv_imsic_topei_rmw(imsic, page, val, new_val, wr_mask); + case ISELECT_IMSIC_EIP0 ... ISELECT_IMSIC_EIP63: + return riscv_imsic_eix_rmw(imsic, xlen, page, + isel - ISELECT_IMSIC_EIP0, + true, val, new_val, wr_mask); + case ISELECT_IMSIC_EIE0 ... ISELECT_IMSIC_EIE63: + return riscv_imsic_eix_rmw(imsic, xlen, page, + isel - ISELECT_IMSIC_EIE0, + false, val, new_val, wr_mask); + default: + break; + }; + +err: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Invalid register priv=%d virt=%d isel=%d vgein=%d\n", + __func__, priv, virt, isel, vgein); + return -EINVAL; +} + +static uint64_t riscv_imsic_read(void *opaque, hwaddr addr, unsigned size) +{ + RISCVIMSICState *imsic = opaque; + + /* Reads must be 4 byte words */ + if ((addr & 0x3) != 0) { + goto err; + } + + /* Reads cannot be out of range */ + if (addr > IMSIC_MMIO_SIZE(imsic->num_pages)) { + goto err; + } + + return 0; + +err: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Invalid register read 0x%" HWADDR_PRIx "\n", + __func__, addr); + return 0; +} + +static void riscv_imsic_write(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + RISCVIMSICState *imsic = opaque; + uint32_t page; + + /* Writes must be 4 byte words */ + if ((addr & 0x3) != 0) { + goto err; + } + + /* Writes cannot be out of range */ + if (addr > IMSIC_MMIO_SIZE(imsic->num_pages)) { + goto err; + } + + /* Writes only supported for MSI little-endian registers */ + page = addr >> IMSIC_MMIO_PAGE_SHIFT; + if ((addr & (IMSIC_MMIO_PAGE_SZ - 1)) == IMSIC_MMIO_PAGE_LE) { + if (value && (value < imsic->num_irqs)) { + imsic->eistate[(page * imsic->num_irqs) + value] |= + IMSIC_EISTATE_PENDING; + } + } + + /* Update CPU external interrupt status */ + riscv_imsic_update(imsic, page); + + return; + +err: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Invalid register write 0x%" HWADDR_PRIx "\n", + __func__, addr); +} + +static const MemoryRegionOps riscv_imsic_ops = { + .read = riscv_imsic_read, + .write = riscv_imsic_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4 + } +}; + +static void riscv_imsic_realize(DeviceState *dev, Error **errp) +{ + RISCVIMSICState *imsic = RISCV_IMSIC(dev); + RISCVCPU *rcpu = RISCV_CPU(qemu_get_cpu(imsic->hartid)); + CPUState *cpu = qemu_get_cpu(imsic->hartid); + CPURISCVState *env = cpu ? cpu->env_ptr : NULL; + + imsic->num_eistate = imsic->num_pages * imsic->num_irqs; + imsic->eidelivery = g_new0(uint32_t, imsic->num_pages); + imsic->eithreshold = g_new0(uint32_t, imsic->num_pages); + imsic->eistate = g_new0(uint32_t, imsic->num_eistate); + + memory_region_init_io(&imsic->mmio, OBJECT(dev), &riscv_imsic_ops, + imsic, TYPE_RISCV_IMSIC, + IMSIC_MMIO_SIZE(imsic->num_pages)); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &imsic->mmio); + + /* Claim the CPU interrupt to be triggered by this IMSIC */ + if (riscv_cpu_claim_interrupts(rcpu, + (imsic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) { + error_setg(errp, "%s already claimed", + (imsic->mmode) ? "MEIP" : "SEIP"); + return; + } + + /* Create output IRQ lines */ + imsic->external_irqs = g_malloc(sizeof(qemu_irq) * imsic->num_pages); + qdev_init_gpio_out(dev, imsic->external_irqs, imsic->num_pages); + + /* Force select AIA feature and setup CSR read-modify-write callback */ + if (env) { + riscv_set_feature(env, RISCV_FEATURE_AIA); + if (!imsic->mmode) { + riscv_cpu_set_geilen(env, imsic->num_pages - 1); + } + riscv_cpu_set_aia_ireg_rmw_fn(env, (imsic->mmode) ? PRV_M : PRV_S, + riscv_imsic_rmw, imsic); + } + + msi_nonbroken = true; +} + +static Property riscv_imsic_properties[] = { + DEFINE_PROP_BOOL("mmode", RISCVIMSICState, mmode, 0), + DEFINE_PROP_UINT32("hartid", RISCVIMSICState, hartid, 0), + DEFINE_PROP_UINT32("num-pages", RISCVIMSICState, num_pages, 0), + DEFINE_PROP_UINT32("num-irqs", RISCVIMSICState, num_irqs, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vmstate_riscv_imsic = { + .name = "riscv_imsic", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_VARRAY_UINT32(eidelivery, RISCVIMSICState, + num_pages, 0, + vmstate_info_uint32, uint32_t), + VMSTATE_VARRAY_UINT32(eithreshold, RISCVIMSICState, + num_pages, 0, + vmstate_info_uint32, uint32_t), + VMSTATE_VARRAY_UINT32(eistate, RISCVIMSICState, + num_eistate, 0, + vmstate_info_uint32, uint32_t), + VMSTATE_END_OF_LIST() + } +}; + +static void riscv_imsic_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, riscv_imsic_properties); + dc->realize = riscv_imsic_realize; + dc->vmsd = &vmstate_riscv_imsic; +} + +static const TypeInfo riscv_imsic_info = { + .name = TYPE_RISCV_IMSIC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(RISCVIMSICState), + .class_init = riscv_imsic_class_init, +}; + +static void riscv_imsic_register_types(void) +{ + type_register_static(&riscv_imsic_info); +} + +type_init(riscv_imsic_register_types) + +/* + * Create IMSIC device. + */ +DeviceState *riscv_imsic_create(hwaddr addr, uint32_t hartid, bool mmode, + uint32_t num_pages, uint32_t num_ids) +{ + DeviceState *dev = qdev_new(TYPE_RISCV_IMSIC); + CPUState *cpu = qemu_get_cpu(hartid); + uint32_t i; + + assert(!(addr & (IMSIC_MMIO_PAGE_SZ - 1))); + if (mmode) { + assert(num_pages == 1); + } else { + assert(num_pages >= 1 && num_pages <= (IRQ_LOCAL_GUEST_MAX + 1)); + } + assert(IMSIC_MIN_ID <= num_ids); + assert(num_ids <= IMSIC_MAX_ID); + assert((num_ids & IMSIC_MIN_ID) == IMSIC_MIN_ID); + + qdev_prop_set_bit(dev, "mmode", mmode); + qdev_prop_set_uint32(dev, "hartid", hartid); + qdev_prop_set_uint32(dev, "num-pages", num_pages); + qdev_prop_set_uint32(dev, "num-irqs", num_ids + 1); + + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); + + for (i = 0; i < num_pages; i++) { + if (!i) { + qdev_connect_gpio_out_named(dev, NULL, i, + qdev_get_gpio_in(DEVICE(cpu), + (mmode) ? IRQ_M_EXT : IRQ_S_EXT)); + } else { + qdev_connect_gpio_out_named(dev, NULL, i, + qdev_get_gpio_in(DEVICE(cpu), + IRQ_LOCAL_MAX + i - 1)); + } + } + + return dev; +} diff --git a/hw/intc/trace-events b/hw/intc/trace-events index b28cda4e08..53414aa197 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -176,6 +176,27 @@ gicv3_its_write(uint64_t offset, uint64_t data, unsigned size) "GICv3 ITS write: gicv3_its_badwrite(uint64_t offset, uint64_t data, unsigned size) "GICv3 ITS write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u: error" gicv3_its_translation_write(uint64_t offset, uint64_t data, unsigned size, uint32_t requester_id) "GICv3 ITS TRANSLATER write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u requester_id 0x%x" gicv3_its_process_command(uint32_t rd_offset, uint8_t cmd) "GICv3 ITS: processing command at offset 0x%x: 0x%x" +gicv3_its_cmd_int(uint32_t devid, uint32_t eventid) "GICv3 ITS: command INT DeviceID 0x%x EventID 0x%x" +gicv3_its_cmd_clear(uint32_t devid, uint32_t eventid) "GICv3 ITS: command CLEAR DeviceID 0x%x EventID 0x%x" +gicv3_its_cmd_discard(uint32_t devid, uint32_t eventid) "GICv3 ITS: command DISCARD DeviceID 0x%x EventID 0x%x" +gicv3_its_cmd_sync(void) "GICv3 ITS: command SYNC" +gicv3_its_cmd_mapd(uint32_t devid, uint32_t size, uint64_t ittaddr, int valid) "GICv3 ITS: command MAPD DeviceID 0x%x Size 0x%x ITT_addr 0x%" PRIx64 " V %d" +gicv3_its_cmd_mapc(uint32_t icid, uint64_t rdbase, int valid) "GICv3 ITS: command MAPC ICID 0x%x RDbase 0x%" PRIx64 " V %d" +gicv3_its_cmd_mapi(uint32_t devid, uint32_t eventid, uint32_t icid) "GICv3 ITS: command MAPI DeviceID 0x%x EventID 0x%x ICID 0x%x" +gicv3_its_cmd_mapti(uint32_t devid, uint32_t eventid, uint32_t icid, uint32_t intid) "GICv3 ITS: command MAPTI DeviceID 0x%x EventID 0x%x ICID 0x%x pINTID 0x%x" +gicv3_its_cmd_inv(void) "GICv3 ITS: command INV or INVALL" +gicv3_its_cmd_movall(uint64_t rd1, uint64_t rd2) "GICv3 ITS: command MOVALL RDbase1 0x%" PRIx64 " RDbase2 0x%" PRIx64 +gicv3_its_cmd_movi(uint32_t devid, uint32_t eventid, uint32_t icid) "GICv3 ITS: command MOVI DeviceID 0x%x EventID 0x%x ICID 0x%x" +gicv3_its_cmd_unknown(unsigned cmd) "GICv3 ITS: unknown command 0x%x" +gicv3_its_cte_read(uint32_t icid, int valid, uint32_t rdbase) "GICv3 ITS: Collection Table read for ICID 0x%x: valid %d RDBase 0x%x" +gicv3_its_cte_write(uint32_t icid, int valid, uint32_t rdbase) "GICv3 ITS: Collection Table write for ICID 0x%x: valid %d RDBase 0x%x" +gicv3_its_cte_read_fault(uint32_t icid) "GICv3 ITS: Collection Table read for ICID 0x%x: faulted" +gicv3_its_ite_read(uint64_t ittaddr, uint32_t eventid, int valid, int inttype, uint32_t intid, uint32_t icid, uint32_t vpeid, uint32_t doorbell) "GICv3 ITS: Interrupt Table read for ITTaddr 0x%" PRIx64 " EventID 0x%x: valid %d inttype %d intid 0x%x ICID 0x%x vPEID 0x%x doorbell 0x%x" +gicv3_its_ite_read_fault(uint64_t ittaddr, uint32_t eventid) "GICv3 ITS: Interrupt Table read for ITTaddr 0x%" PRIx64 " EventID 0x%x: faulted" +gicv3_its_ite_write(uint64_t ittaddr, uint32_t eventid, int valid, int inttype, uint32_t intid, uint32_t icid, uint32_t vpeid, uint32_t doorbell) "GICv3 ITS: Interrupt Table write for ITTaddr 0x%" PRIx64 " EventID 0x%x: valid %d inttype %d intid 0x%x ICID 0x%x vPEID 0x%x doorbell 0x%x" +gicv3_its_dte_read(uint32_t devid, int valid, uint32_t size, uint64_t ittaddr) "GICv3 ITS: Device Table read for DeviceID 0x%x: valid %d size 0x%x ITTaddr 0x%" PRIx64 +gicv3_its_dte_write(uint32_t devid, int valid, uint32_t size, uint64_t ittaddr) "GICv3 ITS: Device Table write for DeviceID 0x%x: valid %d size 0x%x ITTaddr 0x%" PRIx64 +gicv3_its_dte_read_fault(uint32_t devid) "GICv3 ITS: Device Table read for DeviceID 0x%x: faulted" # armv7m_nvic.c nvic_recompute_state(int vectpending, int vectpending_prio, int exception_prio) "NVIC state recomputed: vectpending %d vectpending_prio %d exception_prio %d" diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 98aac98bef..03760ddeae 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -163,6 +163,7 @@ #include "migration/vmstate.h" #include "nvme.h" +#include "dif.h" #include "trace.h" #define NVME_MAX_IOQPAIRS 0xffff @@ -195,6 +196,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = { [NVME_WRITE_ATOMICITY] = true, [NVME_ASYNCHRONOUS_EVENT_CONF] = true, [NVME_TIMESTAMP] = true, + [NVME_HOST_BEHAVIOR_SUPPORT] = true, [NVME_COMMAND_SET_PROFILE] = true, }; @@ -205,6 +207,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE, [NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE, [NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE, + [NVME_HOST_BEHAVIOR_SUPPORT] = NVME_FEAT_CAP_CHANGE, [NVME_COMMAND_SET_PROFILE] = NVME_FEAT_CAP_CHANGE, }; @@ -1065,7 +1068,8 @@ static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) size_t len = nvme_l2b(ns, nlb); uint16_t status; - if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) { + if (nvme_ns_ext(ns) && + !(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) { NvmeSg sg; len += nvme_m2b(ns, nlb); @@ -1244,7 +1248,8 @@ uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len, bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps); bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT); - if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) { + if (nvme_ns_ext(ns) && + !(pi && pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) { return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbasz, ns->lbaf.ms, 0, dir); } @@ -2045,9 +2050,12 @@ static void nvme_verify_cb(void *opaque, int ret) uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint16_t apptag = le16_to_cpu(rw->apptag); uint16_t appmask = le16_to_cpu(rw->appmask); - uint32_t reftag = le32_to_cpu(rw->reftag); + uint64_t reftag = le32_to_cpu(rw->reftag); + uint64_t cdw3 = le32_to_cpu(rw->cdw3); uint16_t status; + reftag |= cdw3 << 32; + trace_pci_nvme_verify_cb(nvme_cid(req), prinfo, apptag, appmask, reftag); if (ret) { @@ -2136,7 +2144,8 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint16_t apptag = le16_to_cpu(rw->apptag); uint16_t appmask = le16_to_cpu(rw->appmask); - uint32_t reftag = le32_to_cpu(rw->reftag); + uint64_t reftag = le32_to_cpu(rw->reftag); + uint64_t cdw3 = le32_to_cpu(rw->cdw3); struct nvme_compare_ctx *ctx = req->opaque; g_autofree uint8_t *buf = NULL; BlockBackend *blk = ns->blkconf.blk; @@ -2144,6 +2153,8 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) BlockAcctStats *stats = blk_get_stats(blk); uint16_t status = NVME_SUCCESS; + reftag |= cdw3 << 32; + trace_pci_nvme_compare_mdata_cb(nvme_cid(req)); if (ret) { @@ -2181,7 +2192,7 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) * tuple. */ if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - nvme_pi_tuple_size(ns); } for (bufp = buf; mbufp < end; bufp += ns->lbaf.ms, mbufp += ns->lbaf.ms) { @@ -2522,7 +2533,8 @@ typedef struct NvmeCopyAIOCB { QEMUBH *bh; int ret; - NvmeCopySourceRange *ranges; + void *ranges; + unsigned int format; int nr; int idx; @@ -2533,7 +2545,7 @@ typedef struct NvmeCopyAIOCB { BlockAcctCookie write; } acct; - uint32_t reftag; + uint64_t reftag; uint64_t slba; NvmeZone *zone; @@ -2587,13 +2599,101 @@ static void nvme_copy_bh(void *opaque) static void nvme_copy_cb(void *opaque, int ret); +static void nvme_copy_source_range_parse_format0(void *ranges, int idx, + uint64_t *slba, uint32_t *nlb, + uint16_t *apptag, + uint16_t *appmask, + uint64_t *reftag) +{ + NvmeCopySourceRangeFormat0 *_ranges = ranges; + + if (slba) { + *slba = le64_to_cpu(_ranges[idx].slba); + } + + if (nlb) { + *nlb = le16_to_cpu(_ranges[idx].nlb) + 1; + } + + if (apptag) { + *apptag = le16_to_cpu(_ranges[idx].apptag); + } + + if (appmask) { + *appmask = le16_to_cpu(_ranges[idx].appmask); + } + + if (reftag) { + *reftag = le32_to_cpu(_ranges[idx].reftag); + } +} + +static void nvme_copy_source_range_parse_format1(void *ranges, int idx, + uint64_t *slba, uint32_t *nlb, + uint16_t *apptag, + uint16_t *appmask, + uint64_t *reftag) +{ + NvmeCopySourceRangeFormat1 *_ranges = ranges; + + if (slba) { + *slba = le64_to_cpu(_ranges[idx].slba); + } + + if (nlb) { + *nlb = le16_to_cpu(_ranges[idx].nlb) + 1; + } + + if (apptag) { + *apptag = le16_to_cpu(_ranges[idx].apptag); + } + + if (appmask) { + *appmask = le16_to_cpu(_ranges[idx].appmask); + } + + if (reftag) { + *reftag = 0; + + *reftag |= (uint64_t)_ranges[idx].sr[4] << 40; + *reftag |= (uint64_t)_ranges[idx].sr[5] << 32; + *reftag |= (uint64_t)_ranges[idx].sr[6] << 24; + *reftag |= (uint64_t)_ranges[idx].sr[7] << 16; + *reftag |= (uint64_t)_ranges[idx].sr[8] << 8; + *reftag |= (uint64_t)_ranges[idx].sr[9]; + } +} + +static void nvme_copy_source_range_parse(void *ranges, int idx, uint8_t format, + uint64_t *slba, uint32_t *nlb, + uint16_t *apptag, uint16_t *appmask, + uint64_t *reftag) +{ + switch (format) { + case NVME_COPY_FORMAT_0: + nvme_copy_source_range_parse_format0(ranges, idx, slba, nlb, apptag, + appmask, reftag); + break; + + case NVME_COPY_FORMAT_1: + nvme_copy_source_range_parse_format1(ranges, idx, slba, nlb, apptag, + appmask, reftag); + break; + + default: + abort(); + } +} + static void nvme_copy_out_completed_cb(void *opaque, int ret) { NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; - NvmeCopySourceRange *range = &iocb->ranges[iocb->idx]; - uint32_t nlb = le32_to_cpu(range->nlb) + 1; + uint32_t nlb; + + nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL, + &nlb, NULL, NULL, NULL); if (ret < 0) { iocb->ret = ret; @@ -2617,7 +2717,6 @@ static void nvme_copy_out_cb(void *opaque, int ret) NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; - NvmeCopySourceRange *range; uint32_t nlb; size_t mlen; uint8_t *mbounce; @@ -2634,8 +2733,8 @@ static void nvme_copy_out_cb(void *opaque, int ret) return; } - range = &iocb->ranges[iocb->idx]; - nlb = le32_to_cpu(range->nlb) + 1; + nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, NULL, + &nlb, NULL, NULL, NULL); mlen = nvme_m2b(ns, nlb); mbounce = iocb->bounce + nvme_l2b(ns, nlb); @@ -2658,8 +2757,10 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; - NvmeCopySourceRange *range; uint32_t nlb; + uint64_t slba; + uint16_t apptag, appmask; + uint64_t reftag; size_t len; uint16_t status; @@ -2670,8 +2771,8 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) goto out; } - range = &iocb->ranges[iocb->idx]; - nlb = le32_to_cpu(range->nlb) + 1; + nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba, + &nlb, &apptag, &appmask, &reftag); len = nvme_l2b(ns, nlb); trace_pci_nvme_copy_out(iocb->slba, nlb); @@ -2682,11 +2783,6 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) uint16_t prinfor = ((copy->control[0] >> 4) & 0xf); uint16_t prinfow = ((copy->control[2] >> 2) & 0xf); - uint16_t apptag = le16_to_cpu(range->apptag); - uint16_t appmask = le16_to_cpu(range->appmask); - uint32_t reftag = le32_to_cpu(range->reftag); - - uint64_t slba = le64_to_cpu(range->slba); size_t mlen = nvme_m2b(ns, nlb); uint8_t *mbounce = iocb->bounce + nvme_l2b(ns, nlb); @@ -2759,7 +2855,6 @@ static void nvme_copy_in_cb(void *opaque, int ret) NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; - NvmeCopySourceRange *range; uint64_t slba; uint32_t nlb; @@ -2775,9 +2870,8 @@ static void nvme_copy_in_cb(void *opaque, int ret) return; } - range = &iocb->ranges[iocb->idx]; - slba = le64_to_cpu(range->slba); - nlb = le32_to_cpu(range->nlb) + 1; + nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba, + &nlb, NULL, NULL, NULL); qemu_iovec_reset(&iocb->iov); qemu_iovec_add(&iocb->iov, iocb->bounce + nvme_l2b(ns, nlb), @@ -2797,7 +2891,6 @@ static void nvme_copy_cb(void *opaque, int ret) NvmeCopyAIOCB *iocb = opaque; NvmeRequest *req = iocb->req; NvmeNamespace *ns = req->ns; - NvmeCopySourceRange *range; uint64_t slba; uint32_t nlb; size_t len; @@ -2814,9 +2907,8 @@ static void nvme_copy_cb(void *opaque, int ret) goto done; } - range = &iocb->ranges[iocb->idx]; - slba = le64_to_cpu(range->slba); - nlb = le32_to_cpu(range->nlb) + 1; + nvme_copy_source_range_parse(iocb->ranges, iocb->idx, iocb->format, &slba, + &nlb, NULL, NULL, NULL); len = nvme_l2b(ns, nlb); trace_pci_nvme_copy_source_range(slba, nlb); @@ -2872,6 +2964,7 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) uint8_t format = copy->control[0] & 0xf; uint16_t prinfor = ((copy->control[0] >> 4) & 0xf); uint16_t prinfow = ((copy->control[2] >> 2) & 0xf); + size_t len = sizeof(NvmeCopySourceRangeFormat0); uint16_t status; @@ -2897,10 +2990,18 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) goto invalid; } - iocb->ranges = g_new(NvmeCopySourceRange, nr); + if (ns->pif && format != 0x1) { + status = NVME_INVALID_FORMAT | NVME_DNR; + goto invalid; + } - status = nvme_h2c(n, (uint8_t *)iocb->ranges, - sizeof(NvmeCopySourceRange) * nr, req); + if (ns->pif) { + len = sizeof(NvmeCopySourceRangeFormat1); + } + + iocb->format = format; + iocb->ranges = g_malloc_n(nr, len); + status = nvme_h2c(n, (uint8_t *)iocb->ranges, len * nr, req); if (status) { goto invalid; } @@ -2926,6 +3027,7 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) iocb->nr = nr; iocb->idx = 0; iocb->reftag = le32_to_cpu(copy->reftag); + iocb->reftag |= (uint64_t)le32_to_cpu(copy->cdw3) << 32; iocb->bounce = g_malloc_n(le16_to_cpu(ns->id_ns.mssrl), ns->lbasz + ns->lbaf.ms); @@ -3164,7 +3266,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { bool pract = prinfo & NVME_PRINFO_PRACT; - if (pract && ns->lbaf.ms == 8) { + if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) { mapped_size = data_size; } } @@ -3241,7 +3343,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { bool pract = prinfo & NVME_PRINFO_PRACT; - if (pract && ns->lbaf.ms == 8) { + if (pract && ns->lbaf.ms == nvme_pi_tuple_size(ns)) { mapped_size -= nvme_m2b(ns, nlb); } } @@ -4712,7 +4814,8 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, } if (c->csi == NVME_CSI_NVM) { - return nvme_rpt_empty_id_struct(n, req); + return nvme_c2h(n, (uint8_t *)&ns->id_ns_nvm, sizeof(NvmeIdNsNvm), + req); } else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) { return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned), req); @@ -5090,6 +5193,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) goto out; case NVME_TIMESTAMP: return nvme_get_feature_timestamp(n, req); + case NVME_HOST_BEHAVIOR_SUPPORT: + return nvme_c2h(n, (uint8_t *)&n->features.hbs, + sizeof(n->features.hbs), req); default: break; } @@ -5159,6 +5265,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) uint32_t nsid = le32_to_cpu(cmd->nsid); uint8_t fid = NVME_GETSETFEAT_FID(dw10); uint8_t save = NVME_SETFEAT_SAVE(dw10); + uint16_t status; int i; trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11); @@ -5280,6 +5387,27 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) break; case NVME_TIMESTAMP: return nvme_set_feature_timestamp(n, req); + case NVME_HOST_BEHAVIOR_SUPPORT: + status = nvme_h2c(n, (uint8_t *)&n->features.hbs, + sizeof(n->features.hbs), req); + if (status) { + return status; + } + + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + ns = nvme_ns(n, i); + + if (!ns) { + continue; + } + + ns->id_ns.nlbaf = ns->nlbaf - 1; + if (!n->features.hbs.lbafee) { + ns->id_ns.nlbaf = MIN(ns->id_ns.nlbaf, 15); + } + } + + return status; case NVME_COMMAND_SET_PROFILE: if (dw11 & 0x1ff) { trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff); @@ -5443,6 +5571,11 @@ typedef struct NvmeFormatAIOCB { uint32_t nsid; bool broadcast; int64_t offset; + + uint8_t lbaf; + uint8_t mset; + uint8_t pi; + uint8_t pil; } NvmeFormatAIOCB; static void nvme_format_bh(void *opaque); @@ -5462,18 +5595,16 @@ static const AIOCBInfo nvme_format_aiocb_info = { .get_aio_context = nvme_get_aio_context, }; -static void nvme_format_set(NvmeNamespace *ns, NvmeCmd *cmd) +static void nvme_format_set(NvmeNamespace *ns, uint8_t lbaf, uint8_t mset, + uint8_t pi, uint8_t pil) { - uint32_t dw10 = le32_to_cpu(cmd->cdw10); - uint8_t lbaf = dw10 & 0xf; - uint8_t pi = (dw10 >> 5) & 0x7; - uint8_t mset = (dw10 >> 4) & 0x1; - uint8_t pil = (dw10 >> 8) & 0x1; + uint8_t lbafl = lbaf & 0xf; + uint8_t lbafu = lbaf >> 4; trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil); ns->id_ns.dps = (pil << 3) | pi; - ns->id_ns.flbas = lbaf | (mset << 4); + ns->id_ns.flbas = (lbafu << 5) | (mset << 4) | lbafl; nvme_ns_init_format(ns); } @@ -5481,7 +5612,6 @@ static void nvme_format_set(NvmeNamespace *ns, NvmeCmd *cmd) static void nvme_format_ns_cb(void *opaque, int ret) { NvmeFormatAIOCB *iocb = opaque; - NvmeRequest *req = iocb->req; NvmeNamespace *ns = iocb->ns; int bytes; @@ -5503,7 +5633,7 @@ static void nvme_format_ns_cb(void *opaque, int ret) return; } - nvme_format_set(ns, &req->cmd); + nvme_format_set(ns, iocb->lbaf, iocb->mset, iocb->pi, iocb->pil); ns->status = 0x0; iocb->ns = NULL; iocb->offset = 0; @@ -5523,7 +5653,7 @@ static uint16_t nvme_format_check(NvmeNamespace *ns, uint8_t lbaf, uint8_t pi) return NVME_INVALID_FORMAT | NVME_DNR; } - if (pi && (ns->id_ns.lbaf[lbaf].ms < sizeof(NvmeDifTuple))) { + if (pi && (ns->id_ns.lbaf[lbaf].ms < nvme_pi_tuple_size(ns))) { return NVME_INVALID_FORMAT | NVME_DNR; } @@ -5586,6 +5716,12 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) { NvmeFormatAIOCB *iocb; uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); + uint8_t lbaf = dw10 & 0xf; + uint8_t mset = (dw10 >> 4) & 0x1; + uint8_t pi = (dw10 >> 5) & 0x7; + uint8_t pil = (dw10 >> 8) & 0x1; + uint8_t lbafu = (dw10 >> 12) & 0x3; uint16_t status; iocb = qemu_aio_get(&nvme_format_aiocb_info, NULL, nvme_misc_cb, req); @@ -5595,9 +5731,17 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) iocb->ret = 0; iocb->ns = NULL; iocb->nsid = 0; + iocb->lbaf = lbaf; + iocb->mset = mset; + iocb->pi = pi; + iocb->pil = pil; iocb->broadcast = (nsid == NVME_NSID_BROADCAST); iocb->offset = 0; + if (n->features.hbs.lbafee) { + iocb->lbaf |= lbafu << 4; + } + if (!iocb->broadcast) { if (!nvme_nsid_valid(n, nsid)) { status = NVME_INVALID_NSID | NVME_DNR; @@ -6573,6 +6717,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->cntlid = cpu_to_le16(n->cntlid); id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR); + id->ctratt |= cpu_to_le32(NVME_CTRATT_ELBAS); id->rab = 6; @@ -6627,7 +6772,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) */ id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT; - id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0); + id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0 | NVME_OCFS_COPY_FORMAT_1); id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN | NVME_CTRL_SGLS_BITBUCKET); diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c index 5dbd18b2a4..62d885f83e 100644 --- a/hw/nvme/dif.c +++ b/hw/nvme/dif.c @@ -13,13 +13,16 @@ #include "sysemu/block-backend.h" #include "nvme.h" +#include "dif.h" #include "trace.h" uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, - uint32_t reftag) + uint64_t reftag) { + uint64_t mask = ns->pif ? 0xffffffffffff : 0xffffffff; + if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) && - (prinfo & NVME_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) { + (prinfo & NVME_PRINFO_PRCHK_REF) && (slba & mask) != reftag) { return NVME_INVALID_PROT_INFO | NVME_DNR; } @@ -27,43 +30,58 @@ uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, } /* from Linux kernel (crypto/crct10dif_common.c) */ -static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer, - size_t len) +static uint16_t crc16_t10dif(uint16_t crc, const unsigned char *buffer, + size_t len) { unsigned int i; for (i = 0; i < len; i++) { - crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; + crc = (crc << 8) ^ crc16_t10dif_table[((crc >> 8) ^ buffer[i]) & 0xff]; } return crc; } -void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint16_t apptag, - uint32_t *reftag) +/* from Linux kernel (lib/crc64.c) */ +static uint64_t crc64_nvme(uint64_t crc, const unsigned char *buffer, + size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + crc = (crc >> 8) ^ crc64_nvme_table[(crc & 0xff) ^ buffer[i]]; + } + + return crc ^ (uint64_t)~0; +} + +static void nvme_dif_pract_generate_dif_crc16(NvmeNamespace *ns, uint8_t *buf, + size_t len, uint8_t *mbuf, + size_t mlen, uint16_t apptag, + uint64_t *reftag) { uint8_t *end = buf + len; int16_t pil = 0; if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - nvme_pi_tuple_size(ns); } - trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil, - apptag, *reftag); + trace_pci_nvme_dif_pract_generate_dif_crc16(len, ns->lbasz, + ns->lbasz + pil, apptag, + *reftag); for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); - uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); + uint16_t crc = crc16_t10dif(0x0, buf, ns->lbasz); if (pil) { - crc = crc_t10dif(crc, mbuf, pil); + crc = crc16_t10dif(crc, mbuf, pil); } - dif->guard = cpu_to_be16(crc); - dif->apptag = cpu_to_be16(apptag); - dif->reftag = cpu_to_be32(*reftag); + dif->g16.guard = cpu_to_be16(crc); + dif->g16.apptag = cpu_to_be16(apptag); + dif->g16.reftag = cpu_to_be32(*reftag); if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { (*reftag)++; @@ -71,57 +89,114 @@ void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, } } -static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, - uint8_t *buf, uint8_t *mbuf, size_t pil, - uint8_t prinfo, uint16_t apptag, - uint16_t appmask, uint32_t reftag) +static void nvme_dif_pract_generate_dif_crc64(NvmeNamespace *ns, uint8_t *buf, + size_t len, uint8_t *mbuf, + size_t mlen, uint16_t apptag, + uint64_t *reftag) +{ + uint8_t *end = buf + len; + int16_t pil = 0; + + if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { + pil = ns->lbaf.ms - 16; + } + + trace_pci_nvme_dif_pract_generate_dif_crc64(len, ns->lbasz, + ns->lbasz + pil, apptag, + *reftag); + + for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { + NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); + uint64_t crc = crc64_nvme(~0ULL, buf, ns->lbasz); + + if (pil) { + crc = crc64_nvme(crc, mbuf, pil); + } + + dif->g64.guard = cpu_to_be64(crc); + dif->g64.apptag = cpu_to_be16(apptag); + + dif->g64.sr[0] = *reftag >> 40; + dif->g64.sr[1] = *reftag >> 32; + dif->g64.sr[2] = *reftag >> 24; + dif->g64.sr[3] = *reftag >> 16; + dif->g64.sr[4] = *reftag >> 8; + dif->g64.sr[5] = *reftag; + + if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { + (*reftag)++; + } + } +} + +void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint16_t apptag, + uint64_t *reftag) +{ + switch (ns->pif) { + case NVME_PI_GUARD_16: + return nvme_dif_pract_generate_dif_crc16(ns, buf, len, mbuf, mlen, + apptag, reftag); + case NVME_PI_GUARD_64: + return nvme_dif_pract_generate_dif_crc64(ns, buf, len, mbuf, mlen, + apptag, reftag); + } + + abort(); +} + +static uint16_t nvme_dif_prchk_crc16(NvmeNamespace *ns, NvmeDifTuple *dif, + uint8_t *buf, uint8_t *mbuf, size_t pil, + uint8_t prinfo, uint16_t apptag, + uint16_t appmask, uint64_t reftag) { switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { case NVME_ID_NS_DPS_TYPE_3: - if (be32_to_cpu(dif->reftag) != 0xffffffff) { + if (be32_to_cpu(dif->g16.reftag) != 0xffffffff) { break; } /* fallthrough */ case NVME_ID_NS_DPS_TYPE_1: case NVME_ID_NS_DPS_TYPE_2: - if (be16_to_cpu(dif->apptag) != 0xffff) { + if (be16_to_cpu(dif->g16.apptag) != 0xffff) { break; } - trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag), - be32_to_cpu(dif->reftag)); + trace_pci_nvme_dif_prchk_disabled_crc16(be16_to_cpu(dif->g16.apptag), + be32_to_cpu(dif->g16.reftag)); return NVME_SUCCESS; } if (prinfo & NVME_PRINFO_PRCHK_GUARD) { - uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); + uint16_t crc = crc16_t10dif(0x0, buf, ns->lbasz); if (pil) { - crc = crc_t10dif(crc, mbuf, pil); + crc = crc16_t10dif(crc, mbuf, pil); } - trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc); + trace_pci_nvme_dif_prchk_guard_crc16(be16_to_cpu(dif->g16.guard), crc); - if (be16_to_cpu(dif->guard) != crc) { + if (be16_to_cpu(dif->g16.guard) != crc) { return NVME_E2E_GUARD_ERROR; } } if (prinfo & NVME_PRINFO_PRCHK_APP) { - trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag, + trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->g16.apptag), apptag, appmask); - if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) { + if ((be16_to_cpu(dif->g16.apptag) & appmask) != (apptag & appmask)) { return NVME_E2E_APP_ERROR; } } if (prinfo & NVME_PRINFO_PRCHK_REF) { - trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag); + trace_pci_nvme_dif_prchk_reftag_crc16(be32_to_cpu(dif->g16.reftag), + reftag); - if (be32_to_cpu(dif->reftag) != reftag) { + if (be32_to_cpu(dif->g16.reftag) != reftag) { return NVME_E2E_REF_ERROR; } } @@ -129,12 +204,96 @@ static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, return NVME_SUCCESS; } +static uint16_t nvme_dif_prchk_crc64(NvmeNamespace *ns, NvmeDifTuple *dif, + uint8_t *buf, uint8_t *mbuf, size_t pil, + uint8_t prinfo, uint16_t apptag, + uint16_t appmask, uint64_t reftag) +{ + uint64_t r = 0; + + r |= (uint64_t)dif->g64.sr[0] << 40; + r |= (uint64_t)dif->g64.sr[1] << 32; + r |= (uint64_t)dif->g64.sr[2] << 24; + r |= (uint64_t)dif->g64.sr[3] << 16; + r |= (uint64_t)dif->g64.sr[4] << 8; + r |= (uint64_t)dif->g64.sr[5]; + + switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { + case NVME_ID_NS_DPS_TYPE_3: + if (r != 0xffffffffffff) { + break; + } + + /* fallthrough */ + case NVME_ID_NS_DPS_TYPE_1: + case NVME_ID_NS_DPS_TYPE_2: + if (be16_to_cpu(dif->g64.apptag) != 0xffff) { + break; + } + + trace_pci_nvme_dif_prchk_disabled_crc64(be16_to_cpu(dif->g16.apptag), + r); + + return NVME_SUCCESS; + } + + if (prinfo & NVME_PRINFO_PRCHK_GUARD) { + uint64_t crc = crc64_nvme(~0ULL, buf, ns->lbasz); + + if (pil) { + crc = crc64_nvme(crc, mbuf, pil); + } + + trace_pci_nvme_dif_prchk_guard_crc64(be64_to_cpu(dif->g64.guard), crc); + + if (be64_to_cpu(dif->g64.guard) != crc) { + return NVME_E2E_GUARD_ERROR; + } + } + + if (prinfo & NVME_PRINFO_PRCHK_APP) { + trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->g64.apptag), apptag, + appmask); + + if ((be16_to_cpu(dif->g64.apptag) & appmask) != (apptag & appmask)) { + return NVME_E2E_APP_ERROR; + } + } + + if (prinfo & NVME_PRINFO_PRCHK_REF) { + trace_pci_nvme_dif_prchk_reftag_crc64(r, reftag); + + if (r != reftag) { + return NVME_E2E_REF_ERROR; + } + } + + return NVME_SUCCESS; +} + +static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, + uint8_t *buf, uint8_t *mbuf, size_t pil, + uint8_t prinfo, uint16_t apptag, + uint16_t appmask, uint64_t reftag) +{ + switch (ns->pif) { + case NVME_PI_GUARD_16: + return nvme_dif_prchk_crc16(ns, dif, buf, mbuf, pil, prinfo, apptag, + appmask, reftag); + case NVME_PI_GUARD_64: + return nvme_dif_prchk_crc64(ns, dif, buf, mbuf, pil, prinfo, apptag, + appmask, reftag); + } + + abort(); +} + uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, uint8_t *mbuf, size_t mlen, uint8_t prinfo, uint64_t slba, uint16_t apptag, - uint16_t appmask, uint32_t *reftag) + uint16_t appmask, uint64_t *reftag) { - uint8_t *end = buf + len; + uint8_t *bufp, *end = buf + len; int16_t pil = 0; uint16_t status; @@ -144,18 +303,34 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, } if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - nvme_pi_tuple_size(ns); } trace_pci_nvme_dif_check(prinfo, ns->lbasz + pil); - for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { + for (bufp = buf; bufp < end; bufp += ns->lbasz, mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); - - status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, prinfo, apptag, + status = nvme_dif_prchk(ns, dif, bufp, mbuf, pil, prinfo, apptag, appmask, *reftag); if (status) { - return status; + /* + * The first block of a 'raw' image is always allocated, so we + * cannot reliably know if the block is all zeroes or not. For + * CRC16 this works fine because the T10 CRC16 is 0x0 for all + * zeroes, but the Rocksoft CRC64 is not. Thus, if a guard error is + * detected for the first block, check if it is zeroed and manually + * set the protection information to all ones to disable protection + * information checking. + */ + if (status == NVME_E2E_GUARD_ERROR && slba == 0x0 && bufp == buf) { + g_autofree uint8_t *zeroes = g_malloc0(ns->lbasz); + + if (memcmp(bufp, zeroes, ns->lbasz) == 0) { + memset(mbuf + pil, 0xff, nvme_pi_tuple_size(ns)); + } + } else { + return status; + } } if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) { @@ -183,7 +358,7 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - nvme_pi_tuple_size(ns); } do { @@ -209,7 +384,7 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, end = mbufp + mlen; for (; mbufp < end; mbufp += ns->lbaf.ms) { - memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple)); + memset(mbufp + pil, 0xff, nvme_pi_tuple_size(ns)); } } @@ -251,9 +426,12 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret) uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint16_t apptag = le16_to_cpu(rw->apptag); uint16_t appmask = le16_to_cpu(rw->appmask); - uint32_t reftag = le32_to_cpu(rw->reftag); + uint64_t reftag = le32_to_cpu(rw->reftag); + uint64_t cdw3 = le32_to_cpu(rw->cdw3); uint16_t status; + reftag |= cdw3 << 32; + trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), prinfo, apptag, appmask, reftag); @@ -283,7 +461,7 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret) goto out; } - if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == 8) { + if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == nvme_pi_tuple_size(ns)) { goto out; } @@ -367,11 +545,14 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control)); uint16_t apptag = le16_to_cpu(rw->apptag); uint16_t appmask = le16_to_cpu(rw->appmask); - uint32_t reftag = le32_to_cpu(rw->reftag); + uint64_t reftag = le32_to_cpu(rw->reftag); + uint64_t cdw3 = le32_to_cpu(rw->cdw3); bool pract = !!(prinfo & NVME_PRINFO_PRACT); NvmeBounceContext *ctx; uint16_t status; + reftag |= cdw3 << 32; + trace_pci_nvme_dif_rw(pract, prinfo); ctx = g_new0(NvmeBounceContext, 1); @@ -387,7 +568,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) if (pract) { uint8_t *mbuf, *end; - int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple); + int16_t pil = ns->lbaf.ms - nvme_pi_tuple_size(ns); status = nvme_check_prinfo(ns, prinfo, slba, reftag); if (status) { @@ -411,8 +592,29 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) for (; mbuf < end; mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); - dif->apptag = cpu_to_be16(apptag); - dif->reftag = cpu_to_be32(reftag); + switch (ns->pif) { + case NVME_PI_GUARD_16: + dif->g16.apptag = cpu_to_be16(apptag); + dif->g16.reftag = cpu_to_be32(reftag); + + break; + + case NVME_PI_GUARD_64: + dif->g64.guard = cpu_to_be64(0x6482d367eb22b64e); + dif->g64.apptag = cpu_to_be16(apptag); + + dif->g64.sr[0] = reftag >> 40; + dif->g64.sr[1] = reftag >> 32; + dif->g64.sr[2] = reftag >> 24; + dif->g64.sr[3] = reftag >> 16; + dif->g64.sr[4] = reftag >> 8; + dif->g64.sr[5] = reftag; + + break; + + default: + abort(); + } switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { case NVME_ID_NS_DPS_TYPE_1: @@ -427,7 +629,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) return NVME_NO_COMPLETE; } - if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == 8)) { + if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) { mapped_len += mlen; } @@ -461,7 +663,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) qemu_iovec_init(&ctx->mdata.iov, 1); qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); - if (!(pract && ns->lbaf.ms == 8)) { + if (!(pract && ns->lbaf.ms == nvme_pi_tuple_size(ns))) { status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size, NVME_TX_DIRECTION_TO_DEVICE, req); if (status) { diff --git a/hw/nvme/dif.h b/hw/nvme/dif.h new file mode 100644 index 0000000000..f12e312250 --- /dev/null +++ b/hw/nvme/dif.h @@ -0,0 +1,191 @@ +#ifndef HW_NVME_DIF_H +#define HW_NVME_DIF_H + +/* from Linux kernel (crypto/crct10dif_common.c) */ +static const uint16_t crc16_t10dif_table[256] = { + 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, + 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, + 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, + 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, + 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, + 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, + 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, + 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, + 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, + 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, + 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, + 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, + 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, + 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, + 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, + 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, + 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, + 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, + 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, + 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, + 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, + 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, + 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, + 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, + 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, + 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, + 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, + 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, + 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, + 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, + 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, + 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 +}; + +#define CRC64_NVME_POLY 0x9A6C9329AC4BC9B5ULL + +static const uint64_t crc64_nvme_table[] = { + 0x0000000000000000ULL, 0x7F6EF0C830358979ULL, + 0xFEDDE190606B12F2ULL, 0x81B31158505E9B8BULL, + 0xC962E5739841B68FULL, 0xB60C15BBA8743FF6ULL, + 0x37BF04E3F82AA47DULL, 0x48D1F42BC81F2D04ULL, + 0xA61CECB46814FE75ULL, 0xD9721C7C5821770CULL, + 0x58C10D24087FEC87ULL, 0x27AFFDEC384A65FEULL, + 0x6F7E09C7F05548FAULL, 0x1010F90FC060C183ULL, + 0x91A3E857903E5A08ULL, 0xEECD189FA00BD371ULL, + 0x78E0FF3B88BE6F81ULL, 0x078E0FF3B88BE6F8ULL, + 0x863D1EABE8D57D73ULL, 0xF953EE63D8E0F40AULL, + 0xB1821A4810FFD90EULL, 0xCEECEA8020CA5077ULL, + 0x4F5FFBD87094CBFCULL, 0x30310B1040A14285ULL, + 0xDEFC138FE0AA91F4ULL, 0xA192E347D09F188DULL, + 0x2021F21F80C18306ULL, 0x5F4F02D7B0F40A7FULL, + 0x179EF6FC78EB277BULL, 0x68F0063448DEAE02ULL, + 0xE943176C18803589ULL, 0x962DE7A428B5BCF0ULL, + 0xF1C1FE77117CDF02ULL, 0x8EAF0EBF2149567BULL, + 0x0F1C1FE77117CDF0ULL, 0x7072EF2F41224489ULL, + 0x38A31B04893D698DULL, 0x47CDEBCCB908E0F4ULL, + 0xC67EFA94E9567B7FULL, 0xB9100A5CD963F206ULL, + 0x57DD12C379682177ULL, 0x28B3E20B495DA80EULL, + 0xA900F35319033385ULL, 0xD66E039B2936BAFCULL, + 0x9EBFF7B0E12997F8ULL, 0xE1D10778D11C1E81ULL, + 0x606216208142850AULL, 0x1F0CE6E8B1770C73ULL, + 0x8921014C99C2B083ULL, 0xF64FF184A9F739FAULL, + 0x77FCE0DCF9A9A271ULL, 0x08921014C99C2B08ULL, + 0x4043E43F0183060CULL, 0x3F2D14F731B68F75ULL, + 0xBE9E05AF61E814FEULL, 0xC1F0F56751DD9D87ULL, + 0x2F3DEDF8F1D64EF6ULL, 0x50531D30C1E3C78FULL, + 0xD1E00C6891BD5C04ULL, 0xAE8EFCA0A188D57DULL, + 0xE65F088B6997F879ULL, 0x9931F84359A27100ULL, + 0x1882E91B09FCEA8BULL, 0x67EC19D339C963F2ULL, + 0xD75ADABD7A6E2D6FULL, 0xA8342A754A5BA416ULL, + 0x29873B2D1A053F9DULL, 0x56E9CBE52A30B6E4ULL, + 0x1E383FCEE22F9BE0ULL, 0x6156CF06D21A1299ULL, + 0xE0E5DE5E82448912ULL, 0x9F8B2E96B271006BULL, + 0x71463609127AD31AULL, 0x0E28C6C1224F5A63ULL, + 0x8F9BD7997211C1E8ULL, 0xF0F5275142244891ULL, + 0xB824D37A8A3B6595ULL, 0xC74A23B2BA0EECECULL, + 0x46F932EAEA507767ULL, 0x3997C222DA65FE1EULL, + 0xAFBA2586F2D042EEULL, 0xD0D4D54EC2E5CB97ULL, + 0x5167C41692BB501CULL, 0x2E0934DEA28ED965ULL, + 0x66D8C0F56A91F461ULL, 0x19B6303D5AA47D18ULL, + 0x980521650AFAE693ULL, 0xE76BD1AD3ACF6FEAULL, + 0x09A6C9329AC4BC9BULL, 0x76C839FAAAF135E2ULL, + 0xF77B28A2FAAFAE69ULL, 0x8815D86ACA9A2710ULL, + 0xC0C42C4102850A14ULL, 0xBFAADC8932B0836DULL, + 0x3E19CDD162EE18E6ULL, 0x41773D1952DB919FULL, + 0x269B24CA6B12F26DULL, 0x59F5D4025B277B14ULL, + 0xD846C55A0B79E09FULL, 0xA72835923B4C69E6ULL, + 0xEFF9C1B9F35344E2ULL, 0x90973171C366CD9BULL, + 0x1124202993385610ULL, 0x6E4AD0E1A30DDF69ULL, + 0x8087C87E03060C18ULL, 0xFFE938B633338561ULL, + 0x7E5A29EE636D1EEAULL, 0x0134D92653589793ULL, + 0x49E52D0D9B47BA97ULL, 0x368BDDC5AB7233EEULL, + 0xB738CC9DFB2CA865ULL, 0xC8563C55CB19211CULL, + 0x5E7BDBF1E3AC9DECULL, 0x21152B39D3991495ULL, + 0xA0A63A6183C78F1EULL, 0xDFC8CAA9B3F20667ULL, + 0x97193E827BED2B63ULL, 0xE877CE4A4BD8A21AULL, + 0x69C4DF121B863991ULL, 0x16AA2FDA2BB3B0E8ULL, + 0xF86737458BB86399ULL, 0x8709C78DBB8DEAE0ULL, + 0x06BAD6D5EBD3716BULL, 0x79D4261DDBE6F812ULL, + 0x3105D23613F9D516ULL, 0x4E6B22FE23CC5C6FULL, + 0xCFD833A67392C7E4ULL, 0xB0B6C36E43A74E9DULL, + 0x9A6C9329AC4BC9B5ULL, 0xE50263E19C7E40CCULL, + 0x64B172B9CC20DB47ULL, 0x1BDF8271FC15523EULL, + 0x530E765A340A7F3AULL, 0x2C608692043FF643ULL, + 0xADD397CA54616DC8ULL, 0xD2BD67026454E4B1ULL, + 0x3C707F9DC45F37C0ULL, 0x431E8F55F46ABEB9ULL, + 0xC2AD9E0DA4342532ULL, 0xBDC36EC59401AC4BULL, + 0xF5129AEE5C1E814FULL, 0x8A7C6A266C2B0836ULL, + 0x0BCF7B7E3C7593BDULL, 0x74A18BB60C401AC4ULL, + 0xE28C6C1224F5A634ULL, 0x9DE29CDA14C02F4DULL, + 0x1C518D82449EB4C6ULL, 0x633F7D4A74AB3DBFULL, + 0x2BEE8961BCB410BBULL, 0x548079A98C8199C2ULL, + 0xD53368F1DCDF0249ULL, 0xAA5D9839ECEA8B30ULL, + 0x449080A64CE15841ULL, 0x3BFE706E7CD4D138ULL, + 0xBA4D61362C8A4AB3ULL, 0xC52391FE1CBFC3CAULL, + 0x8DF265D5D4A0EECEULL, 0xF29C951DE49567B7ULL, + 0x732F8445B4CBFC3CULL, 0x0C41748D84FE7545ULL, + 0x6BAD6D5EBD3716B7ULL, 0x14C39D968D029FCEULL, + 0x95708CCEDD5C0445ULL, 0xEA1E7C06ED698D3CULL, + 0xA2CF882D2576A038ULL, 0xDDA178E515432941ULL, + 0x5C1269BD451DB2CAULL, 0x237C997575283BB3ULL, + 0xCDB181EAD523E8C2ULL, 0xB2DF7122E51661BBULL, + 0x336C607AB548FA30ULL, 0x4C0290B2857D7349ULL, + 0x04D364994D625E4DULL, 0x7BBD94517D57D734ULL, + 0xFA0E85092D094CBFULL, 0x856075C11D3CC5C6ULL, + 0x134D926535897936ULL, 0x6C2362AD05BCF04FULL, + 0xED9073F555E26BC4ULL, 0x92FE833D65D7E2BDULL, + 0xDA2F7716ADC8CFB9ULL, 0xA54187DE9DFD46C0ULL, + 0x24F29686CDA3DD4BULL, 0x5B9C664EFD965432ULL, + 0xB5517ED15D9D8743ULL, 0xCA3F8E196DA80E3AULL, + 0x4B8C9F413DF695B1ULL, 0x34E26F890DC31CC8ULL, + 0x7C339BA2C5DC31CCULL, 0x035D6B6AF5E9B8B5ULL, + 0x82EE7A32A5B7233EULL, 0xFD808AFA9582AA47ULL, + 0x4D364994D625E4DAULL, 0x3258B95CE6106DA3ULL, + 0xB3EBA804B64EF628ULL, 0xCC8558CC867B7F51ULL, + 0x8454ACE74E645255ULL, 0xFB3A5C2F7E51DB2CULL, + 0x7A894D772E0F40A7ULL, 0x05E7BDBF1E3AC9DEULL, + 0xEB2AA520BE311AAFULL, 0x944455E88E0493D6ULL, + 0x15F744B0DE5A085DULL, 0x6A99B478EE6F8124ULL, + 0x224840532670AC20ULL, 0x5D26B09B16452559ULL, + 0xDC95A1C3461BBED2ULL, 0xA3FB510B762E37ABULL, + 0x35D6B6AF5E9B8B5BULL, 0x4AB846676EAE0222ULL, + 0xCB0B573F3EF099A9ULL, 0xB465A7F70EC510D0ULL, + 0xFCB453DCC6DA3DD4ULL, 0x83DAA314F6EFB4ADULL, + 0x0269B24CA6B12F26ULL, 0x7D0742849684A65FULL, + 0x93CA5A1B368F752EULL, 0xECA4AAD306BAFC57ULL, + 0x6D17BB8B56E467DCULL, 0x12794B4366D1EEA5ULL, + 0x5AA8BF68AECEC3A1ULL, 0x25C64FA09EFB4AD8ULL, + 0xA4755EF8CEA5D153ULL, 0xDB1BAE30FE90582AULL, + 0xBCF7B7E3C7593BD8ULL, 0xC399472BF76CB2A1ULL, + 0x422A5673A732292AULL, 0x3D44A6BB9707A053ULL, + 0x759552905F188D57ULL, 0x0AFBA2586F2D042EULL, + 0x8B48B3003F739FA5ULL, 0xF42643C80F4616DCULL, + 0x1AEB5B57AF4DC5ADULL, 0x6585AB9F9F784CD4ULL, + 0xE436BAC7CF26D75FULL, 0x9B584A0FFF135E26ULL, + 0xD389BE24370C7322ULL, 0xACE74EEC0739FA5BULL, + 0x2D545FB4576761D0ULL, 0x523AAF7C6752E8A9ULL, + 0xC41748D84FE75459ULL, 0xBB79B8107FD2DD20ULL, + 0x3ACAA9482F8C46ABULL, 0x45A459801FB9CFD2ULL, + 0x0D75ADABD7A6E2D6ULL, 0x721B5D63E7936BAFULL, + 0xF3A84C3BB7CDF024ULL, 0x8CC6BCF387F8795DULL, + 0x620BA46C27F3AA2CULL, 0x1D6554A417C62355ULL, + 0x9CD645FC4798B8DEULL, 0xE3B8B53477AD31A7ULL, + 0xAB69411FBFB21CA3ULL, 0xD407B1D78F8795DAULL, + 0x55B4A08FDFD90E51ULL, 0x2ADA5047EFEC8728ULL, +}; + +static inline size_t nvme_pi_tuple_size(NvmeNamespace *ns) +{ + return ns->pif ? 16 : 8; +} + +uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, + uint64_t reftag); +uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, + uint64_t slba); +void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint16_t apptag, + uint64_t *reftag); +uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint8_t prinfo, + uint64_t slba, uint16_t apptag, + uint16_t appmask, uint64_t *reftag); +uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); + +#endif /* HW_NVME_DIF_H */ diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index ee673f1a5b..8a3613d9ab 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -58,6 +58,7 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) { static uint64_t ns_count; NvmeIdNs *id_ns = &ns->id_ns; + NvmeIdNsNvm *id_ns_nvm = &ns->id_ns_nvm; uint8_t ds; uint16_t ms; int i; @@ -101,6 +102,8 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) id_ns->dps |= NVME_ID_NS_DPS_FIRST_EIGHT; } + ns->pif = ns->params.pif; + static const NvmeLBAF lbaf[16] = { [0] = { .ds = 9 }, [1] = { .ds = 9, .ms = 8 }, @@ -112,10 +115,11 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) [7] = { .ds = 12, .ms = 64 }, }; + ns->nlbaf = 8; + memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf)); - id_ns->nlbaf = 7; - for (i = 0; i <= id_ns->nlbaf; i++) { + for (i = 0; i < ns->nlbaf; i++) { NvmeLBAF *lbaf = &id_ns->lbaf[i]; if (lbaf->ds == ds) { if (lbaf->ms == ms) { @@ -126,12 +130,16 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) } /* add non-standard lba format */ - id_ns->nlbaf++; - id_ns->lbaf[id_ns->nlbaf].ds = ds; - id_ns->lbaf[id_ns->nlbaf].ms = ms; - id_ns->flbas |= id_ns->nlbaf; + id_ns->lbaf[ns->nlbaf].ds = ds; + id_ns->lbaf[ns->nlbaf].ms = ms; + ns->nlbaf++; + + id_ns->flbas |= i; + lbaf_found: + id_ns_nvm->elbaf[i] = (ns->pif & 0x3) << 7; + id_ns->nlbaf = ns->nlbaf - 1; nvme_ns_init_format(ns); return 0; @@ -370,15 +378,36 @@ static void nvme_zoned_ns_shutdown(NvmeNamespace *ns) static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) { + unsigned int pi_size; + if (!ns->blkconf.blk) { error_setg(errp, "block backend not configured"); return -1; } - if (ns->params.pi && ns->params.ms < 8) { - error_setg(errp, "at least 8 bytes of metadata required to enable " - "protection information"); - return -1; + if (ns->params.pi) { + if (ns->params.pi > NVME_ID_NS_DPS_TYPE_3) { + error_setg(errp, "invalid 'pi' value"); + return -1; + } + + switch (ns->params.pif) { + case NVME_PI_GUARD_16: + pi_size = 8; + break; + case NVME_PI_GUARD_64: + pi_size = 16; + break; + default: + error_setg(errp, "invalid 'pif'"); + return -1; + } + + if (ns->params.ms < pi_size) { + error_setg(errp, "at least %u bytes of metadata required to " + "enable protection information", pi_size); + return -1; + } } if (ns->params.nsid > NVME_MAX_NAMESPACES) { @@ -590,6 +619,7 @@ static Property nvme_ns_props[] = { DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0), DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0), DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0), + DEFINE_PROP_UINT8("pif", NvmeNamespace, params.pif, 0), DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128), DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128), DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127), diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 90c0bb7ce2..739c8b8f79 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -102,6 +102,7 @@ typedef struct NvmeNamespaceParams { uint8_t mset; uint8_t pi; uint8_t pil; + uint8_t pif; uint16_t mssrl; uint32_t mcl; @@ -127,12 +128,15 @@ typedef struct NvmeNamespace { int64_t size; int64_t moff; NvmeIdNs id_ns; + NvmeIdNsNvm id_ns_nvm; NvmeLBAF lbaf; + unsigned int nlbaf; size_t lbasz; const uint32_t *iocs; uint8_t csi; uint16_t status; int attached; + uint8_t pif; struct { uint16_t zrwas; @@ -468,7 +472,9 @@ typedef struct NvmeCtrl { uint16_t temp_thresh_hi; uint16_t temp_thresh_low; }; - uint32_t async_config; + + uint32_t async_config; + NvmeHostBehaviorSupport hbs; } features; } NvmeCtrl; @@ -513,54 +519,4 @@ void nvme_rw_complete_cb(void *opaque, int ret); uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, NvmeCmd *cmd); -/* from Linux kernel (crypto/crct10dif_common.c) */ -static const uint16_t t10_dif_crc_table[256] = { - 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, - 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, - 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, - 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, - 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, - 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, - 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, - 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, - 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, - 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, - 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, - 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, - 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, - 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, - 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, - 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, - 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, - 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, - 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, - 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, - 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, - 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, - 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, - 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, - 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, - 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, - 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, - 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, - 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, - 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, - 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, - 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 -}; - -uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, - uint32_t reftag); -uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, - uint64_t slba); -void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint16_t apptag, - uint32_t *reftag); -uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint8_t prinfo, - uint64_t slba, uint16_t apptag, - uint16_t appmask, uint32_t *reftag); -uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); - - #endif /* HW_NVME_INTERNAL_H */ diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index 90730d802f..ff1b458969 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -20,12 +20,16 @@ pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" pci_nvme_dif_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_dif_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_pract_generate_dif_crc16(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_pract_generate_dif_crc64(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint64_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx64"" pci_nvme_dif_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16"" -pci_nvme_dif_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_dif_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16"" +pci_nvme_dif_prchk_disabled_crc16(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_prchk_disabled_crc64(uint16_t apptag, uint64_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx64"" +pci_nvme_dif_prchk_guard_crc16(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16"" +pci_nvme_dif_prchk_guard_crc64(uint64_t guard, uint64_t crc) "guard 0x%"PRIx64" crc 0x%"PRIx64"" pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16"" -pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32"" +pci_nvme_dif_prchk_reftag_crc16(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32"" +pci_nvme_dif_prchk_reftag_crc64(uint64_t reftag, uint64_t elbrt) "reftag 0x%"PRIx64" elbrt 0x%"PRIx64"" pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8"" pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" pci_nvme_copy_out(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c index fbfdf47e26..18b43be7f6 100644 --- a/hw/nvram/spapr_nvram.c +++ b/hw/nvram/spapr_nvram.c @@ -219,7 +219,7 @@ static void postload_update_cb(void *opaque, bool running, RunState state) { SpaprNvram *nvram = opaque; - /* This is called after bdrv_invalidate_cache_all. */ + /* This is called after bdrv_activate_all. */ qemu_del_vm_change_state_handler(nvram->vmstate); nvram->vmstate = NULL; diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 4cc204f90d..953fc65fa8 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -27,6 +27,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/datadir.h" +#include "qemu/memalign.h" #include "qapi/error.h" #include "qapi/qapi-events-machine.h" #include "qapi/qapi-events-qdev.h" diff --git a/hw/ppc/spapr_softmmu.c b/hw/ppc/spapr_softmmu.c index 4ee03c83e4..5170a33369 100644 --- a/hw/ppc/spapr_softmmu.c +++ b/hw/ppc/spapr_softmmu.c @@ -1,5 +1,6 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/memalign.h" #include "cpu.h" #include "helper_regs.h" #include "hw/ppc/spapr.h" diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig index d2d869aaad..91bb9d21c4 100644 --- a/hw/riscv/Kconfig +++ b/hw/riscv/Kconfig @@ -42,6 +42,8 @@ config RISCV_VIRT select PFLASH_CFI01 select SERIAL select RISCV_ACLINT + select RISCV_APLIC + select RISCV_IMSIC select SIFIVE_PLIC select SIFIVE_TEST select VIRTIO_MMIO diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c index aec7cfa33f..833624d66c 100644 --- a/hw/riscv/opentitan.c +++ b/hw/riscv/opentitan.c @@ -34,13 +34,15 @@ static const MemMapEntry ibex_memmap[] = { [IBEX_DEV_FLASH] = { 0x20000000, 0x80000 }, [IBEX_DEV_UART] = { 0x40000000, 0x1000 }, [IBEX_DEV_GPIO] = { 0x40040000, 0x1000 }, - [IBEX_DEV_SPI] = { 0x40050000, 0x1000 }, + [IBEX_DEV_SPI_DEVICE] = { 0x40050000, 0x1000 }, [IBEX_DEV_I2C] = { 0x40080000, 0x1000 }, [IBEX_DEV_PATTGEN] = { 0x400e0000, 0x1000 }, [IBEX_DEV_TIMER] = { 0x40100000, 0x1000 }, [IBEX_DEV_SENSOR_CTRL] = { 0x40110000, 0x1000 }, [IBEX_DEV_OTP_CTRL] = { 0x40130000, 0x4000 }, [IBEX_DEV_USBDEV] = { 0x40150000, 0x1000 }, + [IBEX_DEV_SPI_HOST0] = { 0x40300000, 0x1000 }, + [IBEX_DEV_SPI_HOST1] = { 0x40310000, 0x1000 }, [IBEX_DEV_PWRMGR] = { 0x40400000, 0x1000 }, [IBEX_DEV_RSTMGR] = { 0x40410000, 0x1000 }, [IBEX_DEV_CLKMGR] = { 0x40420000, 0x1000 }, @@ -209,8 +211,12 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp) create_unimplemented_device("riscv.lowrisc.ibex.gpio", memmap[IBEX_DEV_GPIO].base, memmap[IBEX_DEV_GPIO].size); - create_unimplemented_device("riscv.lowrisc.ibex.spi", - memmap[IBEX_DEV_SPI].base, memmap[IBEX_DEV_SPI].size); + create_unimplemented_device("riscv.lowrisc.ibex.spi_device", + memmap[IBEX_DEV_SPI_DEVICE].base, memmap[IBEX_DEV_SPI_DEVICE].size); + create_unimplemented_device("riscv.lowrisc.ibex.spi_host0", + memmap[IBEX_DEV_SPI_HOST0].base, memmap[IBEX_DEV_SPI_HOST0].size); + create_unimplemented_device("riscv.lowrisc.ibex.spi_host1", + memmap[IBEX_DEV_SPI_HOST1].base, memmap[IBEX_DEV_SPI_HOST1].size); create_unimplemented_device("riscv.lowrisc.ibex.i2c", memmap[IBEX_DEV_I2C].base, memmap[IBEX_DEV_I2C].size); create_unimplemented_device("riscv.lowrisc.ibex.pattgen", diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index e3068d6126..da50cbed43 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -33,6 +33,8 @@ #include "hw/riscv/boot.h" #include "hw/riscv/numa.h" #include "hw/intc/riscv_aclint.h" +#include "hw/intc/riscv_aplic.h" +#include "hw/intc/riscv_imsic.h" #include "hw/intc/sifive_plic.h" #include "hw/misc/sifive_test.h" #include "chardev/char.h" @@ -43,6 +45,28 @@ #include "hw/pci-host/gpex.h" #include "hw/display/ramfb.h" +/* + * The virt machine physical address space used by some of the devices + * namely ACLINT, PLIC, APLIC, and IMSIC depend on number of Sockets, + * number of CPUs, and number of IMSIC guest files. + * + * Various limits defined by VIRT_SOCKETS_MAX_BITS, VIRT_CPUS_MAX_BITS, + * and VIRT_IRQCHIP_MAX_GUESTS_BITS are tuned for maximum utilization + * of virt machine physical address space. + */ + +#define VIRT_IMSIC_GROUP_MAX_SIZE (1U << IMSIC_MMIO_GROUP_MIN_SHIFT) +#if VIRT_IMSIC_GROUP_MAX_SIZE < \ + IMSIC_GROUP_SIZE(VIRT_CPUS_MAX_BITS, VIRT_IRQCHIP_MAX_GUESTS_BITS) +#error "Can't accomodate single IMSIC group in address space" +#endif + +#define VIRT_IMSIC_MAX_SIZE (VIRT_SOCKETS_MAX * \ + VIRT_IMSIC_GROUP_MAX_SIZE) +#if 0x4000000 < VIRT_IMSIC_MAX_SIZE +#error "Can't accomodate all IMSIC groups in address space" +#endif + static const MemMapEntry virt_memmap[] = { [VIRT_DEBUG] = { 0x0, 0x100 }, [VIRT_MROM] = { 0x1000, 0xf000 }, @@ -52,10 +76,14 @@ static const MemMapEntry virt_memmap[] = { [VIRT_ACLINT_SSWI] = { 0x2F00000, 0x4000 }, [VIRT_PCIE_PIO] = { 0x3000000, 0x10000 }, [VIRT_PLIC] = { 0xc000000, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) }, + [VIRT_APLIC_M] = { 0xc000000, APLIC_SIZE(VIRT_CPUS_MAX) }, + [VIRT_APLIC_S] = { 0xd000000, APLIC_SIZE(VIRT_CPUS_MAX) }, [VIRT_UART0] = { 0x10000000, 0x100 }, [VIRT_VIRTIO] = { 0x10001000, 0x1000 }, [VIRT_FW_CFG] = { 0x10100000, 0x18 }, [VIRT_FLASH] = { 0x20000000, 0x4000000 }, + [VIRT_IMSIC_M] = { 0x24000000, VIRT_IMSIC_MAX_SIZE }, + [VIRT_IMSIC_S] = { 0x28000000, VIRT_IMSIC_MAX_SIZE }, [VIRT_PCIE_ECAM] = { 0x30000000, 0x10000000 }, [VIRT_PCIE_MMIO] = { 0x40000000, 0x40000000 }, [VIRT_DRAM] = { 0x80000000, 0x0 }, @@ -133,12 +161,13 @@ static void virt_flash_map(RISCVVirtState *s, sysmem); } -static void create_pcie_irq_map(void *fdt, char *nodename, - uint32_t plic_phandle) +static void create_pcie_irq_map(RISCVVirtState *s, void *fdt, char *nodename, + uint32_t irqchip_phandle) { int pin, dev; - uint32_t - full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS * FDT_INT_MAP_WIDTH] = {}; + uint32_t irq_map_stride = 0; + uint32_t full_irq_map[GPEX_NUM_IRQS * GPEX_NUM_IRQS * + FDT_MAX_INT_MAP_WIDTH] = {}; uint32_t *irq_map = full_irq_map; /* This code creates a standard swizzle of interrupts such that @@ -156,23 +185,31 @@ static void create_pcie_irq_map(void *fdt, char *nodename, int irq_nr = PCIE_IRQ + ((pin + PCI_SLOT(devfn)) % GPEX_NUM_IRQS); int i = 0; + /* Fill PCI address cells */ irq_map[i] = cpu_to_be32(devfn << 8); - i += FDT_PCI_ADDR_CELLS; - irq_map[i] = cpu_to_be32(pin + 1); + /* Fill PCI Interrupt cells */ + irq_map[i] = cpu_to_be32(pin + 1); i += FDT_PCI_INT_CELLS; - irq_map[i++] = cpu_to_be32(plic_phandle); - i += FDT_PLIC_ADDR_CELLS; - irq_map[i] = cpu_to_be32(irq_nr); + /* Fill interrupt controller phandle and cells */ + irq_map[i++] = cpu_to_be32(irqchip_phandle); + irq_map[i++] = cpu_to_be32(irq_nr); + if (s->aia_type != VIRT_AIA_TYPE_NONE) { + irq_map[i++] = cpu_to_be32(0x4); + } - irq_map += FDT_INT_MAP_WIDTH; + if (!irq_map_stride) { + irq_map_stride = i; + } + irq_map += irq_map_stride; } } - qemu_fdt_setprop(fdt, nodename, "interrupt-map", - full_irq_map, sizeof(full_irq_map)); + qemu_fdt_setprop(fdt, nodename, "interrupt-map", full_irq_map, + GPEX_NUM_IRQS * GPEX_NUM_IRQS * + irq_map_stride * sizeof(uint32_t)); qemu_fdt_setprop_cells(fdt, nodename, "interrupt-map-mask", 0x1800, 0, 0, 0x7); @@ -298,7 +335,7 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, { int cpu; char *name; - unsigned long addr; + unsigned long addr, size; uint32_t aclint_cells_size; uint32_t *aclint_mswi_cells; uint32_t *aclint_sswi_cells; @@ -319,29 +356,38 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, } aclint_cells_size = s->soc[socket].num_harts * sizeof(uint32_t) * 2; - addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket); - name = g_strdup_printf("/soc/mswi@%lx", addr); - qemu_fdt_add_subnode(mc->fdt, name); - qemu_fdt_setprop_string(mc->fdt, name, "compatible", "riscv,aclint-mswi"); - qemu_fdt_setprop_cells(mc->fdt, name, "reg", - 0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE); - qemu_fdt_setprop(mc->fdt, name, "interrupts-extended", - aclint_mswi_cells, aclint_cells_size); - qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0); - qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0); - riscv_socket_fdt_write_id(mc, mc->fdt, name, socket); - g_free(name); + if (s->aia_type != VIRT_AIA_TYPE_APLIC_IMSIC) { + addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket); + name = g_strdup_printf("/soc/mswi@%lx", addr); + qemu_fdt_add_subnode(mc->fdt, name); + qemu_fdt_setprop_string(mc->fdt, name, "compatible", + "riscv,aclint-mswi"); + qemu_fdt_setprop_cells(mc->fdt, name, "reg", + 0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE); + qemu_fdt_setprop(mc->fdt, name, "interrupts-extended", + aclint_mswi_cells, aclint_cells_size); + qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0); + riscv_socket_fdt_write_id(mc, mc->fdt, name, socket); + g_free(name); + } - addr = memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE + - (memmap[VIRT_CLINT].size * socket); + if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { + addr = memmap[VIRT_CLINT].base + + (RISCV_ACLINT_DEFAULT_MTIMER_SIZE * socket); + size = RISCV_ACLINT_DEFAULT_MTIMER_SIZE; + } else { + addr = memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE + + (memmap[VIRT_CLINT].size * socket); + size = memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE; + } name = g_strdup_printf("/soc/mtimer@%lx", addr); qemu_fdt_add_subnode(mc->fdt, name); qemu_fdt_setprop_string(mc->fdt, name, "compatible", "riscv,aclint-mtimer"); qemu_fdt_setprop_cells(mc->fdt, name, "reg", 0x0, addr + RISCV_ACLINT_DEFAULT_MTIME, - 0x0, memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE - - RISCV_ACLINT_DEFAULT_MTIME, + 0x0, size - RISCV_ACLINT_DEFAULT_MTIME, 0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP, 0x0, RISCV_ACLINT_DEFAULT_MTIME); qemu_fdt_setprop(mc->fdt, name, "interrupts-extended", @@ -349,19 +395,22 @@ static void create_fdt_socket_aclint(RISCVVirtState *s, riscv_socket_fdt_write_id(mc, mc->fdt, name, socket); g_free(name); - addr = memmap[VIRT_ACLINT_SSWI].base + - (memmap[VIRT_ACLINT_SSWI].size * socket); - name = g_strdup_printf("/soc/sswi@%lx", addr); - qemu_fdt_add_subnode(mc->fdt, name); - qemu_fdt_setprop_string(mc->fdt, name, "compatible", "riscv,aclint-sswi"); - qemu_fdt_setprop_cells(mc->fdt, name, "reg", - 0x0, addr, 0x0, memmap[VIRT_ACLINT_SSWI].size); - qemu_fdt_setprop(mc->fdt, name, "interrupts-extended", - aclint_sswi_cells, aclint_cells_size); - qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0); - qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0); - riscv_socket_fdt_write_id(mc, mc->fdt, name, socket); - g_free(name); + if (s->aia_type != VIRT_AIA_TYPE_APLIC_IMSIC) { + addr = memmap[VIRT_ACLINT_SSWI].base + + (memmap[VIRT_ACLINT_SSWI].size * socket); + name = g_strdup_printf("/soc/sswi@%lx", addr); + qemu_fdt_add_subnode(mc->fdt, name); + qemu_fdt_setprop_string(mc->fdt, name, "compatible", + "riscv,aclint-sswi"); + qemu_fdt_setprop_cells(mc->fdt, name, "reg", + 0x0, addr, 0x0, memmap[VIRT_ACLINT_SSWI].size); + qemu_fdt_setprop(mc->fdt, name, "interrupts-extended", + aclint_sswi_cells, aclint_cells_size); + qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0); + riscv_socket_fdt_write_id(mc, mc->fdt, name, socket); + g_free(name); + } g_free(aclint_mswi_cells); g_free(aclint_mtimer_cells); @@ -405,8 +454,6 @@ static void create_fdt_socket_plic(RISCVVirtState *s, plic_name = g_strdup_printf("/soc/plic@%lx", plic_addr); qemu_fdt_add_subnode(mc->fdt, plic_name); qemu_fdt_setprop_cell(mc->fdt, plic_name, - "#address-cells", FDT_PLIC_ADDR_CELLS); - qemu_fdt_setprop_cell(mc->fdt, plic_name, "#interrupt-cells", FDT_PLIC_INT_CELLS); qemu_fdt_setprop_string_array(mc->fdt, plic_name, "compatible", (char **)&plic_compat, @@ -425,17 +472,233 @@ static void create_fdt_socket_plic(RISCVVirtState *s, g_free(plic_cells); } +static uint32_t imsic_num_bits(uint32_t count) +{ + uint32_t ret = 0; + + while (BIT(ret) < count) { + ret++; + } + + return ret; +} + +static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, + uint32_t *phandle, uint32_t *intc_phandles, + uint32_t *msi_m_phandle, uint32_t *msi_s_phandle) +{ + int cpu, socket; + char *imsic_name; + MachineState *mc = MACHINE(s); + uint32_t imsic_max_hart_per_socket, imsic_guest_bits; + uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size; + + *msi_m_phandle = (*phandle)++; + *msi_s_phandle = (*phandle)++; + imsic_cells = g_new0(uint32_t, mc->smp.cpus * 2); + imsic_regs = g_new0(uint32_t, riscv_socket_count(mc) * 4); + + /* M-level IMSIC node */ + for (cpu = 0; cpu < mc->smp.cpus; cpu++) { + imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); + imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT); + } + imsic_max_hart_per_socket = 0; + for (socket = 0; socket < riscv_socket_count(mc); socket++) { + imsic_addr = memmap[VIRT_IMSIC_M].base + + socket * VIRT_IMSIC_GROUP_MAX_SIZE; + imsic_size = IMSIC_HART_SIZE(0) * s->soc[socket].num_harts; + imsic_regs[socket * 4 + 0] = 0; + imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr); + imsic_regs[socket * 4 + 2] = 0; + imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size); + if (imsic_max_hart_per_socket < s->soc[socket].num_harts) { + imsic_max_hart_per_socket = s->soc[socket].num_harts; + } + } + imsic_name = g_strdup_printf("/soc/imsics@%lx", + (unsigned long)memmap[VIRT_IMSIC_M].base); + qemu_fdt_add_subnode(mc->fdt, imsic_name); + qemu_fdt_setprop_string(mc->fdt, imsic_name, "compatible", + "riscv,imsics"); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "#interrupt-cells", + FDT_IMSIC_INT_CELLS); + qemu_fdt_setprop(mc->fdt, imsic_name, "interrupt-controller", + NULL, 0); + qemu_fdt_setprop(mc->fdt, imsic_name, "msi-controller", + NULL, 0); + qemu_fdt_setprop(mc->fdt, imsic_name, "interrupts-extended", + imsic_cells, mc->smp.cpus * sizeof(uint32_t) * 2); + qemu_fdt_setprop(mc->fdt, imsic_name, "reg", imsic_regs, + riscv_socket_count(mc) * sizeof(uint32_t) * 4); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,num-ids", + VIRT_IRQCHIP_NUM_MSIS); + qemu_fdt_setprop_cells(mc->fdt, imsic_name, "riscv,ipi-id", + VIRT_IRQCHIP_IPI_MSI); + if (riscv_socket_count(mc) > 1) { + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,hart-index-bits", + imsic_num_bits(imsic_max_hart_per_socket)); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,group-index-bits", + imsic_num_bits(riscv_socket_count(mc))); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,group-index-shift", + IMSIC_MMIO_GROUP_MIN_SHIFT); + } + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "phandle", *msi_m_phandle); + g_free(imsic_name); + + /* S-level IMSIC node */ + for (cpu = 0; cpu < mc->smp.cpus; cpu++) { + imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); + imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT); + } + imsic_guest_bits = imsic_num_bits(s->aia_guests + 1); + imsic_max_hart_per_socket = 0; + for (socket = 0; socket < riscv_socket_count(mc); socket++) { + imsic_addr = memmap[VIRT_IMSIC_S].base + + socket * VIRT_IMSIC_GROUP_MAX_SIZE; + imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) * + s->soc[socket].num_harts; + imsic_regs[socket * 4 + 0] = 0; + imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr); + imsic_regs[socket * 4 + 2] = 0; + imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size); + if (imsic_max_hart_per_socket < s->soc[socket].num_harts) { + imsic_max_hart_per_socket = s->soc[socket].num_harts; + } + } + imsic_name = g_strdup_printf("/soc/imsics@%lx", + (unsigned long)memmap[VIRT_IMSIC_S].base); + qemu_fdt_add_subnode(mc->fdt, imsic_name); + qemu_fdt_setprop_string(mc->fdt, imsic_name, "compatible", + "riscv,imsics"); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "#interrupt-cells", + FDT_IMSIC_INT_CELLS); + qemu_fdt_setprop(mc->fdt, imsic_name, "interrupt-controller", + NULL, 0); + qemu_fdt_setprop(mc->fdt, imsic_name, "msi-controller", + NULL, 0); + qemu_fdt_setprop(mc->fdt, imsic_name, "interrupts-extended", + imsic_cells, mc->smp.cpus * sizeof(uint32_t) * 2); + qemu_fdt_setprop(mc->fdt, imsic_name, "reg", imsic_regs, + riscv_socket_count(mc) * sizeof(uint32_t) * 4); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,num-ids", + VIRT_IRQCHIP_NUM_MSIS); + qemu_fdt_setprop_cells(mc->fdt, imsic_name, "riscv,ipi-id", + VIRT_IRQCHIP_IPI_MSI); + if (imsic_guest_bits) { + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,guest-index-bits", + imsic_guest_bits); + } + if (riscv_socket_count(mc) > 1) { + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,hart-index-bits", + imsic_num_bits(imsic_max_hart_per_socket)); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,group-index-bits", + imsic_num_bits(riscv_socket_count(mc))); + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "riscv,group-index-shift", + IMSIC_MMIO_GROUP_MIN_SHIFT); + } + qemu_fdt_setprop_cell(mc->fdt, imsic_name, "phandle", *msi_s_phandle); + g_free(imsic_name); + + g_free(imsic_regs); + g_free(imsic_cells); +} + +static void create_fdt_socket_aplic(RISCVVirtState *s, + const MemMapEntry *memmap, int socket, + uint32_t msi_m_phandle, + uint32_t msi_s_phandle, + uint32_t *phandle, + uint32_t *intc_phandles, + uint32_t *aplic_phandles) +{ + int cpu; + char *aplic_name; + uint32_t *aplic_cells; + unsigned long aplic_addr; + MachineState *mc = MACHINE(s); + uint32_t aplic_m_phandle, aplic_s_phandle; + + aplic_m_phandle = (*phandle)++; + aplic_s_phandle = (*phandle)++; + aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); + + /* M-level APLIC node */ + for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { + aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); + aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT); + } + aplic_addr = memmap[VIRT_APLIC_M].base + + (memmap[VIRT_APLIC_M].size * socket); + aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); + qemu_fdt_add_subnode(mc->fdt, aplic_name); + qemu_fdt_setprop_string(mc->fdt, aplic_name, "compatible", "riscv,aplic"); + qemu_fdt_setprop_cell(mc->fdt, aplic_name, + "#interrupt-cells", FDT_APLIC_INT_CELLS); + qemu_fdt_setprop(mc->fdt, aplic_name, "interrupt-controller", NULL, 0); + if (s->aia_type == VIRT_AIA_TYPE_APLIC) { + qemu_fdt_setprop(mc->fdt, aplic_name, "interrupts-extended", + aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2); + } else { + qemu_fdt_setprop_cell(mc->fdt, aplic_name, "msi-parent", + msi_m_phandle); + } + qemu_fdt_setprop_cells(mc->fdt, aplic_name, "reg", + 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_M].size); + qemu_fdt_setprop_cell(mc->fdt, aplic_name, "riscv,num-sources", + VIRT_IRQCHIP_NUM_SOURCES); + qemu_fdt_setprop_cell(mc->fdt, aplic_name, "riscv,children", + aplic_s_phandle); + qemu_fdt_setprop_cells(mc->fdt, aplic_name, "riscv,delegate", + aplic_s_phandle, 0x1, VIRT_IRQCHIP_NUM_SOURCES); + riscv_socket_fdt_write_id(mc, mc->fdt, aplic_name, socket); + qemu_fdt_setprop_cell(mc->fdt, aplic_name, "phandle", aplic_m_phandle); + g_free(aplic_name); + + /* S-level APLIC node */ + for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { + aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); + aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT); + } + aplic_addr = memmap[VIRT_APLIC_S].base + + (memmap[VIRT_APLIC_S].size * socket); + aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); + qemu_fdt_add_subnode(mc->fdt, aplic_name); + qemu_fdt_setprop_string(mc->fdt, aplic_name, "compatible", "riscv,aplic"); + qemu_fdt_setprop_cell(mc->fdt, aplic_name, + "#interrupt-cells", FDT_APLIC_INT_CELLS); + qemu_fdt_setprop(mc->fdt, aplic_name, "interrupt-controller", NULL, 0); + if (s->aia_type == VIRT_AIA_TYPE_APLIC) { + qemu_fdt_setprop(mc->fdt, aplic_name, "interrupts-extended", + aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2); + } else { + qemu_fdt_setprop_cell(mc->fdt, aplic_name, "msi-parent", + msi_s_phandle); + } + qemu_fdt_setprop_cells(mc->fdt, aplic_name, "reg", + 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_S].size); + qemu_fdt_setprop_cell(mc->fdt, aplic_name, "riscv,num-sources", + VIRT_IRQCHIP_NUM_SOURCES); + riscv_socket_fdt_write_id(mc, mc->fdt, aplic_name, socket); + qemu_fdt_setprop_cell(mc->fdt, aplic_name, "phandle", aplic_s_phandle); + g_free(aplic_name); + + g_free(aplic_cells); + aplic_phandles[socket] = aplic_s_phandle; +} + static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, bool is_32_bit, uint32_t *phandle, uint32_t *irq_mmio_phandle, uint32_t *irq_pcie_phandle, - uint32_t *irq_virtio_phandle) + uint32_t *irq_virtio_phandle, + uint32_t *msi_pcie_phandle) { - int socket; char *clust_name; - uint32_t *intc_phandles; + int socket, phandle_pos; MachineState *mc = MACHINE(s); - uint32_t xplic_phandles[MAX_NODES]; + uint32_t msi_m_phandle = 0, msi_s_phandle = 0; + uint32_t *intc_phandles, xplic_phandles[MAX_NODES]; qemu_fdt_add_subnode(mc->fdt, "/cpus"); qemu_fdt_setprop_cell(mc->fdt, "/cpus", "timebase-frequency", @@ -444,32 +707,55 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, qemu_fdt_setprop_cell(mc->fdt, "/cpus", "#address-cells", 0x1); qemu_fdt_add_subnode(mc->fdt, "/cpus/cpu-map"); + intc_phandles = g_new0(uint32_t, mc->smp.cpus); + + phandle_pos = mc->smp.cpus; for (socket = (riscv_socket_count(mc) - 1); socket >= 0; socket--) { + phandle_pos -= s->soc[socket].num_harts; + clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket); qemu_fdt_add_subnode(mc->fdt, clust_name); - intc_phandles = g_new0(uint32_t, s->soc[socket].num_harts); - create_fdt_socket_cpus(s, socket, clust_name, phandle, - is_32_bit, intc_phandles); + is_32_bit, &intc_phandles[phandle_pos]); create_fdt_socket_memory(s, memmap, socket); + g_free(clust_name); + if (!kvm_enabled()) { if (s->have_aclint) { - create_fdt_socket_aclint(s, memmap, socket, intc_phandles); + create_fdt_socket_aclint(s, memmap, socket, + &intc_phandles[phandle_pos]); } else { - create_fdt_socket_clint(s, memmap, socket, intc_phandles); + create_fdt_socket_clint(s, memmap, socket, + &intc_phandles[phandle_pos]); } } + } - create_fdt_socket_plic(s, memmap, socket, phandle, - intc_phandles, xplic_phandles); + if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { + create_fdt_imsic(s, memmap, phandle, intc_phandles, + &msi_m_phandle, &msi_s_phandle); + *msi_pcie_phandle = msi_s_phandle; + } - g_free(intc_phandles); - g_free(clust_name); + phandle_pos = mc->smp.cpus; + for (socket = (riscv_socket_count(mc) - 1); socket >= 0; socket--) { + phandle_pos -= s->soc[socket].num_harts; + + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + create_fdt_socket_plic(s, memmap, socket, phandle, + &intc_phandles[phandle_pos], xplic_phandles); + } else { + create_fdt_socket_aplic(s, memmap, socket, + msi_m_phandle, msi_s_phandle, phandle, + &intc_phandles[phandle_pos], xplic_phandles); + } } + g_free(intc_phandles); + for (socket = 0; socket < riscv_socket_count(mc); socket++) { if (socket == 0) { *irq_mmio_phandle = xplic_phandles[socket]; @@ -505,13 +791,20 @@ static void create_fdt_virtio(RISCVVirtState *s, const MemMapEntry *memmap, 0x0, memmap[VIRT_VIRTIO].size); qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent", irq_virtio_phandle); - qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", VIRTIO_IRQ + i); + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", + VIRTIO_IRQ + i); + } else { + qemu_fdt_setprop_cells(mc->fdt, name, "interrupts", + VIRTIO_IRQ + i, 0x4); + } g_free(name); } } static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap, - uint32_t irq_pcie_phandle) + uint32_t irq_pcie_phandle, + uint32_t msi_pcie_phandle) { char *name; MachineState *mc = MACHINE(s); @@ -531,6 +824,9 @@ static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap, qemu_fdt_setprop_cells(mc->fdt, name, "bus-range", 0, memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1); qemu_fdt_setprop(mc->fdt, name, "dma-coherent", NULL, 0); + if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { + qemu_fdt_setprop_cell(mc->fdt, name, "msi-parent", msi_pcie_phandle); + } qemu_fdt_setprop_cells(mc->fdt, name, "reg", 0, memmap[VIRT_PCIE_ECAM].base, 0, memmap[VIRT_PCIE_ECAM].size); qemu_fdt_setprop_sized_cells(mc->fdt, name, "ranges", @@ -543,7 +839,7 @@ static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap, 2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.size); - create_pcie_irq_map(mc->fdt, name, irq_pcie_phandle); + create_pcie_irq_map(s, mc->fdt, name, irq_pcie_phandle); g_free(name); } @@ -602,7 +898,11 @@ static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap, 0x0, memmap[VIRT_UART0].size); qemu_fdt_setprop_cell(mc->fdt, name, "clock-frequency", 3686400); qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent", irq_mmio_phandle); - qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", UART0_IRQ); + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", UART0_IRQ); + } else { + qemu_fdt_setprop_cells(mc->fdt, name, "interrupts", UART0_IRQ, 0x4); + } qemu_fdt_add_subnode(mc->fdt, "/chosen"); qemu_fdt_setprop_string(mc->fdt, "/chosen", "stdout-path", name); @@ -623,7 +923,11 @@ static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap, 0x0, memmap[VIRT_RTC].base, 0x0, memmap[VIRT_RTC].size); qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent", irq_mmio_phandle); - qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", RTC_IRQ); + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", RTC_IRQ); + } else { + qemu_fdt_setprop_cells(mc->fdt, name, "interrupts", RTC_IRQ, 0x4); + } g_free(name); } @@ -648,7 +952,7 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, uint64_t mem_size, const char *cmdline, bool is_32_bit) { MachineState *mc = MACHINE(s); - uint32_t phandle = 1, irq_mmio_phandle = 1; + uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1; uint32_t irq_pcie_phandle = 1, irq_virtio_phandle = 1; if (mc->dtb) { @@ -678,11 +982,12 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap, qemu_fdt_setprop_cell(mc->fdt, "/soc", "#address-cells", 0x2); create_fdt_sockets(s, memmap, is_32_bit, &phandle, - &irq_mmio_phandle, &irq_pcie_phandle, &irq_virtio_phandle); + &irq_mmio_phandle, &irq_pcie_phandle, &irq_virtio_phandle, + &msi_pcie_phandle); create_fdt_virtio(s, memmap, irq_virtio_phandle); - create_fdt_pcie(s, memmap, irq_pcie_phandle); + create_fdt_pcie(s, memmap, irq_pcie_phandle, msi_pcie_phandle); create_fdt_reset(s, memmap, &phandle); @@ -704,7 +1009,7 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem, hwaddr high_mmio_base, hwaddr high_mmio_size, hwaddr pio_base, - DeviceState *plic) + DeviceState *irqchip) { DeviceState *dev; MemoryRegion *ecam_alias, *ecam_reg; @@ -738,7 +1043,7 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem, sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, pio_base); for (i = 0; i < GPEX_NUM_IRQS; i++) { - irq = qdev_get_gpio_in(plic, PCIE_IRQ + i); + irq = qdev_get_gpio_in(irqchip, PCIE_IRQ + i); sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq); gpex_set_irq_num(GPEX_HOST(dev), i, PCIE_IRQ + i); @@ -769,18 +1074,100 @@ static FWCfgState *create_fw_cfg(const MachineState *mc) return fw_cfg; } +static DeviceState *virt_create_plic(const MemMapEntry *memmap, int socket, + int base_hartid, int hart_count) +{ + DeviceState *ret; + char *plic_hart_config; + + /* Per-socket PLIC hart topology configuration string */ + plic_hart_config = riscv_plic_hart_config_string(hart_count); + + /* Per-socket PLIC */ + ret = sifive_plic_create( + memmap[VIRT_PLIC].base + socket * memmap[VIRT_PLIC].size, + plic_hart_config, hart_count, base_hartid, + VIRT_IRQCHIP_NUM_SOURCES, + ((1U << VIRT_IRQCHIP_NUM_PRIO_BITS) - 1), + VIRT_PLIC_PRIORITY_BASE, + VIRT_PLIC_PENDING_BASE, + VIRT_PLIC_ENABLE_BASE, + VIRT_PLIC_ENABLE_STRIDE, + VIRT_PLIC_CONTEXT_BASE, + VIRT_PLIC_CONTEXT_STRIDE, + memmap[VIRT_PLIC].size); + + g_free(plic_hart_config); + + return ret; +} + +static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests, + const MemMapEntry *memmap, int socket, + int base_hartid, int hart_count) +{ + int i; + hwaddr addr; + uint32_t guest_bits; + DeviceState *aplic_m; + bool msimode = (aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) ? true : false; + + if (msimode) { + /* Per-socket M-level IMSICs */ + addr = memmap[VIRT_IMSIC_M].base + socket * VIRT_IMSIC_GROUP_MAX_SIZE; + for (i = 0; i < hart_count; i++) { + riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), + base_hartid + i, true, 1, + VIRT_IRQCHIP_NUM_MSIS); + } + + /* Per-socket S-level IMSICs */ + guest_bits = imsic_num_bits(aia_guests + 1); + addr = memmap[VIRT_IMSIC_S].base + socket * VIRT_IMSIC_GROUP_MAX_SIZE; + for (i = 0; i < hart_count; i++) { + riscv_imsic_create(addr + i * IMSIC_HART_SIZE(guest_bits), + base_hartid + i, false, 1 + aia_guests, + VIRT_IRQCHIP_NUM_MSIS); + } + } + + /* Per-socket M-level APLIC */ + aplic_m = riscv_aplic_create( + memmap[VIRT_APLIC_M].base + socket * memmap[VIRT_APLIC_M].size, + memmap[VIRT_APLIC_M].size, + (msimode) ? 0 : base_hartid, + (msimode) ? 0 : hart_count, + VIRT_IRQCHIP_NUM_SOURCES, + VIRT_IRQCHIP_NUM_PRIO_BITS, + msimode, true, NULL); + + if (aplic_m) { + /* Per-socket S-level APLIC */ + riscv_aplic_create( + memmap[VIRT_APLIC_S].base + socket * memmap[VIRT_APLIC_S].size, + memmap[VIRT_APLIC_S].size, + (msimode) ? 0 : base_hartid, + (msimode) ? 0 : hart_count, + VIRT_IRQCHIP_NUM_SOURCES, + VIRT_IRQCHIP_NUM_PRIO_BITS, + msimode, false, aplic_m); + } + + return aplic_m; +} + static void virt_machine_init(MachineState *machine) { const MemMapEntry *memmap = virt_memmap; RISCVVirtState *s = RISCV_VIRT_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); MemoryRegion *mask_rom = g_new(MemoryRegion, 1); - char *plic_hart_config, *soc_name; + char *soc_name; target_ulong start_addr = memmap[VIRT_DRAM].base; target_ulong firmware_end_addr, kernel_start_addr; uint32_t fdt_load_addr; uint64_t kernel_entry; - DeviceState *mmio_plic, *virtio_plic, *pcie_plic; + DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip; int i, base_hartid, hart_count; /* Check socket count limit */ @@ -791,7 +1178,7 @@ static void virt_machine_init(MachineState *machine) } /* Initialize sockets */ - mmio_plic = virtio_plic = pcie_plic = NULL; + mmio_irqchip = virtio_irqchip = pcie_irqchip = NULL; for (i = 0; i < riscv_socket_count(machine); i++) { if (!riscv_socket_check_hartids(machine, i)) { error_report("discontinuous hartids in socket%d", i); @@ -823,56 +1210,68 @@ static void virt_machine_init(MachineState *machine) sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_abort); if (!kvm_enabled()) { - /* Per-socket CLINT */ - riscv_aclint_swi_create( - memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size, - base_hartid, hart_count, false); - riscv_aclint_mtimer_create( - memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size + - RISCV_ACLINT_SWI_SIZE, - RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count, - RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, - RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); - - /* Per-socket ACLINT SSWI */ if (s->have_aclint) { + if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) { + /* Per-socket ACLINT MTIMER */ + riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base + + i * RISCV_ACLINT_DEFAULT_MTIMER_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, + base_hartid, hart_count, + RISCV_ACLINT_DEFAULT_MTIMECMP, + RISCV_ACLINT_DEFAULT_MTIME, + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); + } else { + /* Per-socket ACLINT MSWI, MTIMER, and SSWI */ + riscv_aclint_swi_create(memmap[VIRT_CLINT].base + + i * memmap[VIRT_CLINT].size, + base_hartid, hart_count, false); + riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base + + i * memmap[VIRT_CLINT].size + + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, + base_hartid, hart_count, + RISCV_ACLINT_DEFAULT_MTIMECMP, + RISCV_ACLINT_DEFAULT_MTIME, + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); + riscv_aclint_swi_create(memmap[VIRT_ACLINT_SSWI].base + + i * memmap[VIRT_ACLINT_SSWI].size, + base_hartid, hart_count, true); + } + } else { + /* Per-socket SiFive CLINT */ riscv_aclint_swi_create( - memmap[VIRT_ACLINT_SSWI].base + - i * memmap[VIRT_ACLINT_SSWI].size, - base_hartid, hart_count, true); + memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size, + base_hartid, hart_count, false); + riscv_aclint_mtimer_create(memmap[VIRT_CLINT].base + + i * memmap[VIRT_CLINT].size + RISCV_ACLINT_SWI_SIZE, + RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count, + RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME, + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true); } } - /* Per-socket PLIC hart topology configuration string */ - plic_hart_config = riscv_plic_hart_config_string(hart_count); - - /* Per-socket PLIC */ - s->plic[i] = sifive_plic_create( - memmap[VIRT_PLIC].base + i * memmap[VIRT_PLIC].size, - plic_hart_config, hart_count, base_hartid, - VIRT_PLIC_NUM_SOURCES, - VIRT_PLIC_NUM_PRIORITIES, - VIRT_PLIC_PRIORITY_BASE, - VIRT_PLIC_PENDING_BASE, - VIRT_PLIC_ENABLE_BASE, - VIRT_PLIC_ENABLE_STRIDE, - VIRT_PLIC_CONTEXT_BASE, - VIRT_PLIC_CONTEXT_STRIDE, - memmap[VIRT_PLIC].size); - g_free(plic_hart_config); + /* Per-socket interrupt controller */ + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + s->irqchip[i] = virt_create_plic(memmap, i, + base_hartid, hart_count); + } else { + s->irqchip[i] = virt_create_aia(s->aia_type, s->aia_guests, + memmap, i, base_hartid, + hart_count); + } - /* Try to use different PLIC instance based device type */ + /* Try to use different IRQCHIP instance based device type */ if (i == 0) { - mmio_plic = s->plic[i]; - virtio_plic = s->plic[i]; - pcie_plic = s->plic[i]; + mmio_irqchip = s->irqchip[i]; + virtio_irqchip = s->irqchip[i]; + pcie_irqchip = s->irqchip[i]; } if (i == 1) { - virtio_plic = s->plic[i]; - pcie_plic = s->plic[i]; + virtio_irqchip = s->irqchip[i]; + pcie_irqchip = s->irqchip[i]; } if (i == 2) { - pcie_plic = s->plic[i]; + pcie_irqchip = s->irqchip[i]; } } @@ -990,7 +1389,7 @@ static void virt_machine_init(MachineState *machine) for (i = 0; i < VIRTIO_COUNT; i++) { sysbus_create_simple("virtio-mmio", memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size, - qdev_get_gpio_in(DEVICE(virtio_plic), VIRTIO_IRQ + i)); + qdev_get_gpio_in(DEVICE(virtio_irqchip), VIRTIO_IRQ + i)); } gpex_pcie_init(system_memory, @@ -1001,14 +1400,14 @@ static void virt_machine_init(MachineState *machine) virt_high_pcie_memmap.base, virt_high_pcie_memmap.size, memmap[VIRT_PCIE_PIO].base, - DEVICE(pcie_plic)); + DEVICE(pcie_irqchip)); serial_mm_init(system_memory, memmap[VIRT_UART0].base, - 0, qdev_get_gpio_in(DEVICE(mmio_plic), UART0_IRQ), 399193, + 0, qdev_get_gpio_in(DEVICE(mmio_irqchip), UART0_IRQ), 399193, serial_hd(0), DEVICE_LITTLE_ENDIAN); sysbus_create_simple("goldfish_rtc", memmap[VIRT_RTC].base, - qdev_get_gpio_in(DEVICE(mmio_plic), RTC_IRQ)); + qdev_get_gpio_in(DEVICE(mmio_irqchip), RTC_IRQ)); virt_flash_create(s); @@ -1024,6 +1423,64 @@ static void virt_machine_instance_init(Object *obj) { } +static char *virt_get_aia_guests(Object *obj, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + char val[32]; + + sprintf(val, "%d", s->aia_guests); + return g_strdup(val); +} + +static void virt_set_aia_guests(Object *obj, const char *val, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + + s->aia_guests = atoi(val); + if (s->aia_guests < 0 || s->aia_guests > VIRT_IRQCHIP_MAX_GUESTS) { + error_setg(errp, "Invalid number of AIA IMSIC guests"); + error_append_hint(errp, "Valid values be between 0 and %d.\n", + VIRT_IRQCHIP_MAX_GUESTS); + } +} + +static char *virt_get_aia(Object *obj, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + const char *val; + + switch (s->aia_type) { + case VIRT_AIA_TYPE_APLIC: + val = "aplic"; + break; + case VIRT_AIA_TYPE_APLIC_IMSIC: + val = "aplic-imsic"; + break; + default: + val = "none"; + break; + }; + + return g_strdup(val); +} + +static void virt_set_aia(Object *obj, const char *val, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + + if (!strcmp(val, "none")) { + s->aia_type = VIRT_AIA_TYPE_NONE; + } else if (!strcmp(val, "aplic")) { + s->aia_type = VIRT_AIA_TYPE_APLIC; + } else if (!strcmp(val, "aplic-imsic")) { + s->aia_type = VIRT_AIA_TYPE_APLIC_IMSIC; + } else { + error_setg(errp, "Invalid AIA interrupt controller type"); + error_append_hint(errp, "Valid values are none, aplic, and " + "aplic-imsic.\n"); + } +} + static bool virt_get_aclint(Object *obj, Error **errp) { MachineState *ms = MACHINE(obj); @@ -1042,6 +1499,7 @@ static void virt_set_aclint(Object *obj, bool value, Error **errp) static void virt_machine_class_init(ObjectClass *oc, void *data) { + char str[128]; MachineClass *mc = MACHINE_CLASS(oc); mc->desc = "RISC-V VirtIO board"; @@ -1062,6 +1520,20 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) object_class_property_set_description(oc, "aclint", "Set on/off to enable/disable " "emulating ACLINT devices"); + + object_class_property_add_str(oc, "aia", virt_get_aia, + virt_set_aia); + object_class_property_set_description(oc, "aia", + "Set type of AIA interrupt " + "conttoller. Valid values are " + "none, aplic, and aplic-imsic."); + + object_class_property_add_str(oc, "aia-guests", + virt_get_aia_guests, + virt_set_aia_guests); + sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid value " + "should be between 0 and %d.", VIRT_IRQCHIP_MAX_GUESTS); + object_class_property_set_description(oc, "aia-guests", str); } static const TypeInfo virt_machine_typeinfo = { diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 3666b8d946..072686ed58 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -26,6 +26,7 @@ #include "qemu/main-loop.h" #include "qemu/module.h" #include "qemu/hw-version.h" +#include "qemu/memalign.h" #include "hw/scsi/scsi.h" #include "migration/qemu-file-types.h" #include "migration/vmstate.h" diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c index 6dbb9f41e4..c89ac53e65 100644 --- a/hw/tpm/tpm_ppi.c +++ b/hw/tpm/tpm_ppi.c @@ -12,7 +12,7 @@ */ #include "qemu/osdep.h" - +#include "qemu/memalign.h" #include "qapi/error.h" #include "sysemu/memory_mapping.h" #include "migration/vmstate.h" diff --git a/hw/usb/dev-mtp.c b/hw/usb/dev-mtp.c index 1e6ac76bef..e6b77a2a94 100644 --- a/hw/usb/dev-mtp.c +++ b/hw/usb/dev-mtp.c @@ -1607,7 +1607,7 @@ static void usb_mtp_write_data(MTPState *s, uint32_t handle) usb_mtp_object_lookup(s, s->dataset.parent_handle); char *path = NULL; uint64_t rc; - mode_t mask = 0644; + mode_t mask = 0755; int ret = 0; assert(d != NULL); @@ -1635,7 +1635,7 @@ static void usb_mtp_write_data(MTPState *s, uint32_t handle) } d->fd = open(path, O_CREAT | O_WRONLY | - O_CLOEXEC | O_NOFOLLOW, mask); + O_CLOEXEC | O_NOFOLLOW, mask & 0666); if (d->fd == -1) { ret = 1; goto done; diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c index a93d6b2e98..895b29fb86 100644 --- a/hw/usb/hcd-ohci.c +++ b/hw/usb/hcd-ohci.c @@ -58,8 +58,6 @@ struct ohci_hcca { #define ED_WBACK_OFFSET offsetof(struct ohci_ed, head) #define ED_WBACK_SIZE 4 -static void ohci_async_cancel_device(OHCIState *ohci, USBDevice *dev); - /* Bitfields for the first word of an Endpoint Desciptor. */ #define OHCI_ED_FA_SHIFT 0 #define OHCI_ED_FA_MASK (0x7f<<OHCI_ED_FA_SHIFT) @@ -261,92 +259,6 @@ static inline void ohci_set_interrupt(OHCIState *ohci, uint32_t intr) ohci_intr_update(ohci); } -/* Attach or detach a device on a root hub port. */ -static void ohci_attach(USBPort *port1) -{ - OHCIState *s = port1->opaque; - OHCIPort *port = &s->rhport[port1->index]; - uint32_t old_state = port->ctrl; - - /* set connect status */ - port->ctrl |= OHCI_PORT_CCS | OHCI_PORT_CSC; - - /* update speed */ - if (port->port.dev->speed == USB_SPEED_LOW) { - port->ctrl |= OHCI_PORT_LSDA; - } else { - port->ctrl &= ~OHCI_PORT_LSDA; - } - - /* notify of remote-wakeup */ - if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) { - ohci_set_interrupt(s, OHCI_INTR_RD); - } - - trace_usb_ohci_port_attach(port1->index); - - if (old_state != port->ctrl) { - ohci_set_interrupt(s, OHCI_INTR_RHSC); - } -} - -static void ohci_detach(USBPort *port1) -{ - OHCIState *s = port1->opaque; - OHCIPort *port = &s->rhport[port1->index]; - uint32_t old_state = port->ctrl; - - ohci_async_cancel_device(s, port1->dev); - - /* set connect status */ - if (port->ctrl & OHCI_PORT_CCS) { - port->ctrl &= ~OHCI_PORT_CCS; - port->ctrl |= OHCI_PORT_CSC; - } - /* disable port */ - if (port->ctrl & OHCI_PORT_PES) { - port->ctrl &= ~OHCI_PORT_PES; - port->ctrl |= OHCI_PORT_PESC; - } - trace_usb_ohci_port_detach(port1->index); - - if (old_state != port->ctrl) { - ohci_set_interrupt(s, OHCI_INTR_RHSC); - } -} - -static void ohci_wakeup(USBPort *port1) -{ - OHCIState *s = port1->opaque; - OHCIPort *port = &s->rhport[port1->index]; - uint32_t intr = 0; - if (port->ctrl & OHCI_PORT_PSS) { - trace_usb_ohci_port_wakeup(port1->index); - port->ctrl |= OHCI_PORT_PSSC; - port->ctrl &= ~OHCI_PORT_PSS; - intr = OHCI_INTR_RHSC; - } - /* Note that the controller can be suspended even if this port is not */ - if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) { - trace_usb_ohci_remote_wakeup(s->name); - /* This is the one state transition the controller can do by itself */ - s->ctl &= ~OHCI_CTL_HCFS; - s->ctl |= OHCI_USB_RESUME; - /* In suspend mode only ResumeDetected is possible, not RHSC: - * see the OHCI spec 5.1.2.3. - */ - intr = OHCI_INTR_RD; - } - ohci_set_interrupt(s, intr); -} - -static void ohci_child_detach(USBPort *port1, USBDevice *child) -{ - OHCIState *s = port1->opaque; - - ohci_async_cancel_device(s, child); -} - static USBDevice *ohci_find_device(OHCIState *ohci, uint8_t addr) { USBDevice *dev; @@ -369,6 +281,10 @@ void ohci_stop_endpoints(OHCIState *ohci) USBDevice *dev; int i, j; + if (ohci->async_td) { + usb_cancel_packet(&ohci->usb_packet); + ohci->async_td = 0; + } for (i = 0; i < ohci->num_ports; i++) { dev = ohci->rhport[i].port.dev; if (dev && dev->attached) { @@ -398,10 +314,6 @@ static void ohci_roothub_reset(OHCIState *ohci) usb_port_reset(&port->port); } } - if (ohci->async_td) { - usb_cancel_packet(&ohci->usb_packet); - ohci->async_td = 0; - } ohci_stop_endpoints(ohci); } @@ -634,21 +546,9 @@ static int ohci_copy_iso_td(OHCIState *ohci, return 0; } -static void ohci_process_lists(OHCIState *ohci, int completion); - -static void ohci_async_complete_packet(USBPort *port, USBPacket *packet) -{ - OHCIState *ohci = container_of(packet, OHCIState, usb_packet); - - trace_usb_ohci_async_complete(); - ohci->async_complete = true; - ohci_process_lists(ohci, 1); -} - #define USUB(a, b) ((int16_t)((uint16_t)(a) - (uint16_t)(b))) -static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, - int completion) +static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed) { int dir; size_t len = 0; @@ -658,6 +558,9 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, int i; USBDevice *dev; USBEndpoint *ep; + USBPacket *pkt; + uint8_t buf[8192]; + bool int_req; struct ohci_iso_td iso_td; uint32_t addr; uint16_t starting_frame; @@ -792,40 +695,42 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, } else { len = end_addr - start_addr + 1; } - if (len > sizeof(ohci->usb_buf)) { - len = sizeof(ohci->usb_buf); + if (len > sizeof(buf)) { + len = sizeof(buf); } if (len && dir != OHCI_TD_DIR_IN) { - if (ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, len, + if (ohci_copy_iso_td(ohci, start_addr, end_addr, buf, len, DMA_DIRECTION_TO_DEVICE)) { ohci_die(ohci); return 1; } } - if (!completion) { - bool int_req = relative_frame_number == frame_count && - OHCI_BM(iso_td.flags, TD_DI) == 0; - dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA)); - if (dev == NULL) { - trace_usb_ohci_td_dev_error(); - return 1; - } - ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN)); - usb_packet_setup(&ohci->usb_packet, pid, ep, 0, addr, false, int_req); - usb_packet_addbuf(&ohci->usb_packet, ohci->usb_buf, len); - usb_handle_packet(dev, &ohci->usb_packet); - if (ohci->usb_packet.status == USB_RET_ASYNC) { - usb_device_flush_ep_queue(dev, ep); - return 1; - } + dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA)); + if (dev == NULL) { + trace_usb_ohci_td_dev_error(); + return 1; } - if (ohci->usb_packet.status == USB_RET_SUCCESS) { - ret = ohci->usb_packet.actual_length; + ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN)); + pkt = g_new0(USBPacket, 1); + usb_packet_init(pkt); + int_req = relative_frame_number == frame_count && + OHCI_BM(iso_td.flags, TD_DI) == 0; + usb_packet_setup(pkt, pid, ep, 0, addr, false, int_req); + usb_packet_addbuf(pkt, buf, len); + usb_handle_packet(dev, pkt); + if (pkt->status == USB_RET_ASYNC) { + usb_device_flush_ep_queue(dev, ep); + g_free(pkt); + return 1; + } + if (pkt->status == USB_RET_SUCCESS) { + ret = pkt->actual_length; } else { - ret = ohci->usb_packet.status; + ret = pkt->status; } + g_free(pkt); trace_usb_ohci_iso_td_so(start_offset, end_offset, start_addr, end_addr, str, len, ret); @@ -833,7 +738,7 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, /* Writeback */ if (dir == OHCI_TD_DIR_IN && ret >= 0 && ret <= len) { /* IN transfer succeeded */ - if (ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, ret, + if (ohci_copy_iso_td(ohci, start_addr, end_addr, buf, ret, DMA_DIRECTION_FROM_DEVICE)) { ohci_die(ohci); return 1; @@ -1033,21 +938,21 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed) ohci->async_td = 0; ohci->async_complete = false; } else { + dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA)); + if (dev == NULL) { + trace_usb_ohci_td_dev_error(); + return 1; + } + ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN)); if (ohci->async_td) { /* ??? The hardware should allow one active packet per endpoint. We only allow one active packet per controller. This should be sufficient as long as devices respond in a timely manner. */ - trace_usb_ohci_td_too_many_pending(); + trace_usb_ohci_td_too_many_pending(ep->nr); return 1; } - dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA)); - if (dev == NULL) { - trace_usb_ohci_td_dev_error(); - return 1; - } - ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN)); usb_packet_setup(&ohci->usb_packet, pid, ep, 0, addr, !flag_r, OHCI_BM(td.flags, TD_DI) == 0); usb_packet_addbuf(&ohci->usb_packet, ohci->usb_buf, pktlen); @@ -1156,7 +1061,7 @@ exit_no_retire: } /* Service an endpoint list. Returns nonzero if active TD were found. */ -static int ohci_service_ed_list(OHCIState *ohci, uint32_t head, int completion) +static int ohci_service_ed_list(OHCIState *ohci, uint32_t head) { struct ohci_ed ed; uint32_t next_ed; @@ -1207,8 +1112,9 @@ static int ohci_service_ed_list(OHCIState *ohci, uint32_t head, int completion) break; } else { /* Handle isochronous endpoints */ - if (ohci_service_iso_td(ohci, &ed, completion)) + if (ohci_service_iso_td(ohci, &ed)) { break; + } } } @@ -1235,20 +1141,20 @@ static void ohci_sof(OHCIState *ohci) } /* Process Control and Bulk lists. */ -static void ohci_process_lists(OHCIState *ohci, int completion) +static void ohci_process_lists(OHCIState *ohci) { if ((ohci->ctl & OHCI_CTL_CLE) && (ohci->status & OHCI_STATUS_CLF)) { if (ohci->ctrl_cur && ohci->ctrl_cur != ohci->ctrl_head) { trace_usb_ohci_process_lists(ohci->ctrl_head, ohci->ctrl_cur); } - if (!ohci_service_ed_list(ohci, ohci->ctrl_head, completion)) { + if (!ohci_service_ed_list(ohci, ohci->ctrl_head)) { ohci->ctrl_cur = 0; ohci->status &= ~OHCI_STATUS_CLF; } } if ((ohci->ctl & OHCI_CTL_BLE) && (ohci->status & OHCI_STATUS_BLF)) { - if (!ohci_service_ed_list(ohci, ohci->bulk_head, completion)) { + if (!ohci_service_ed_list(ohci, ohci->bulk_head)) { ohci->bulk_cur = 0; ohci->status &= ~OHCI_STATUS_BLF; } @@ -1272,19 +1178,15 @@ static void ohci_frame_boundary(void *opaque) int n; n = ohci->frame_number & 0x1f; - ohci_service_ed_list(ohci, le32_to_cpu(hcca.intr[n]), 0); + ohci_service_ed_list(ohci, le32_to_cpu(hcca.intr[n])); } /* Cancel all pending packets if either of the lists has been disabled. */ if (ohci->old_ctl & (~ohci->ctl) & (OHCI_CTL_BLE | OHCI_CTL_CLE)) { - if (ohci->async_td) { - usb_cancel_packet(&ohci->usb_packet); - ohci->async_td = 0; - } ohci_stop_endpoints(ohci); } ohci->old_ctl = ohci->ctl; - ohci_process_lists(ohci, 0); + ohci_process_lists(ohci); /* Stop if UnrecoverableError happened or ohci_sof will crash */ if (ohci->intr_status & OHCI_INTR_UE) { @@ -1793,8 +1695,45 @@ static void ohci_mem_write(void *opaque, } } -static void ohci_async_cancel_device(OHCIState *ohci, USBDevice *dev) +static const MemoryRegionOps ohci_mem_ops = { + .read = ohci_mem_read, + .write = ohci_mem_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +/* USBPortOps */ +static void ohci_attach(USBPort *port1) { + OHCIState *s = port1->opaque; + OHCIPort *port = &s->rhport[port1->index]; + uint32_t old_state = port->ctrl; + + /* set connect status */ + port->ctrl |= OHCI_PORT_CCS | OHCI_PORT_CSC; + + /* update speed */ + if (port->port.dev->speed == USB_SPEED_LOW) { + port->ctrl |= OHCI_PORT_LSDA; + } else { + port->ctrl &= ~OHCI_PORT_LSDA; + } + + /* notify of remote-wakeup */ + if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) { + ohci_set_interrupt(s, OHCI_INTR_RD); + } + + trace_usb_ohci_port_attach(port1->index); + + if (old_state != port->ctrl) { + ohci_set_interrupt(s, OHCI_INTR_RHSC); + } +} + +static void ohci_child_detach(USBPort *port1, USBDevice *dev) +{ + OHCIState *ohci = port1->opaque; + if (ohci->async_td && usb_packet_is_inflight(&ohci->usb_packet) && ohci->usb_packet.ep->dev == dev) { @@ -1803,11 +1742,65 @@ static void ohci_async_cancel_device(OHCIState *ohci, USBDevice *dev) } } -static const MemoryRegionOps ohci_mem_ops = { - .read = ohci_mem_read, - .write = ohci_mem_write, - .endianness = DEVICE_LITTLE_ENDIAN, -}; +static void ohci_detach(USBPort *port1) +{ + OHCIState *s = port1->opaque; + OHCIPort *port = &s->rhport[port1->index]; + uint32_t old_state = port->ctrl; + + ohci_child_detach(port1, port1->dev); + + /* set connect status */ + if (port->ctrl & OHCI_PORT_CCS) { + port->ctrl &= ~OHCI_PORT_CCS; + port->ctrl |= OHCI_PORT_CSC; + } + /* disable port */ + if (port->ctrl & OHCI_PORT_PES) { + port->ctrl &= ~OHCI_PORT_PES; + port->ctrl |= OHCI_PORT_PESC; + } + trace_usb_ohci_port_detach(port1->index); + + if (old_state != port->ctrl) { + ohci_set_interrupt(s, OHCI_INTR_RHSC); + } +} + +static void ohci_wakeup(USBPort *port1) +{ + OHCIState *s = port1->opaque; + OHCIPort *port = &s->rhport[port1->index]; + uint32_t intr = 0; + if (port->ctrl & OHCI_PORT_PSS) { + trace_usb_ohci_port_wakeup(port1->index); + port->ctrl |= OHCI_PORT_PSSC; + port->ctrl &= ~OHCI_PORT_PSS; + intr = OHCI_INTR_RHSC; + } + /* Note that the controller can be suspended even if this port is not */ + if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) { + trace_usb_ohci_remote_wakeup(s->name); + /* This is the one state transition the controller can do by itself */ + s->ctl &= ~OHCI_CTL_HCFS; + s->ctl |= OHCI_USB_RESUME; + /* + * In suspend mode only ResumeDetected is possible, not RHSC: + * see the OHCI spec 5.1.2.3. + */ + intr = OHCI_INTR_RD; + } + ohci_set_interrupt(s, intr); +} + +static void ohci_async_complete_packet(USBPort *port, USBPacket *packet) +{ + OHCIState *ohci = container_of(packet, OHCIState, usb_packet); + + trace_usb_ohci_async_complete(); + ohci->async_complete = true; + ohci_process_lists(ohci); +} static USBPortOps ohci_port_ops = { .attach = ohci_attach, diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index 14bdb89676..0cd0a5e540 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -2523,7 +2523,7 @@ static void xhci_process_commands(XHCIState *xhci) case CR_VENDOR_NEC_FIRMWARE_REVISION: if (xhci->nec_quirks) { event.type = 48; /* NEC reply */ - event.length = 0x3025; + event.length = 0x3034; } else { event.ccode = CC_TRB_ERROR; } diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c index 5f0ef9cb3b..8692ea2561 100644 --- a/hw/usb/redirect.c +++ b/hw/usb/redirect.c @@ -1239,7 +1239,11 @@ static void usbredir_create_parser(USBRedirDevice *dev) DPRINTF("creating usbredirparser\n"); - dev->parser = qemu_oom_check(usbredirparser_create()); + dev->parser = usbredirparser_create(); + if (!dev->parser) { + error_report("usbredirparser_create() failed"); + exit(1); + } dev->parser->priv = dev; dev->parser->log_func = usbredir_log; dev->parser->read_func = usbredir_read; @@ -2239,7 +2243,10 @@ static int usbredir_put_parser(QEMUFile *f, void *priv, size_t unused, } usbredirparser_serialize(dev->parser, &data, &len); - qemu_oom_check(data); + if (!data) { + error_report("usbredirparser_serialize failed"); + exit(1); + } qemu_put_be32(f, len); qemu_put_buffer(f, data, len); @@ -2330,7 +2337,11 @@ static int usbredir_get_bufpq(QEMUFile *f, void *priv, size_t unused, bufp->len = qemu_get_be32(f); bufp->status = qemu_get_be32(f); bufp->offset = 0; - bufp->data = qemu_oom_check(malloc(bufp->len)); /* regular malloc! */ + bufp->data = malloc(bufp->len); /* regular malloc! */ + if (!bufp->data) { + error_report("usbredir_get_bufpq: out of memory"); + exit(1); + } bufp->free_on_destroy = bufp->data; qemu_get_buffer(f, bufp->data, bufp->len); QTAILQ_INSERT_TAIL(&endp->bufpq, bufp, next); diff --git a/hw/usb/trace-events b/hw/usb/trace-events index b8287b63f1..9773cb5330 100644 --- a/hw/usb/trace-events +++ b/hw/usb/trace-events @@ -51,7 +51,7 @@ usb_ohci_td_skip_async(void) "" usb_ohci_td_pkt_hdr(uint32_t addr, int64_t pktlen, int64_t len, const char *s, int flag_r, uint32_t cbp, uint32_t be) " TD @ 0x%.8x %" PRId64 " of %" PRId64 " bytes %s r=%d cbp=0x%.8x be=0x%.8x" usb_ohci_td_pkt_short(const char *dir, const char *buf) "%s data: %s" usb_ohci_td_pkt_full(const char *dir, const char *buf) "%s data: %s" -usb_ohci_td_too_many_pending(void) "" +usb_ohci_td_too_many_pending(int ep) "ep=%d" usb_ohci_td_packet_status(int status) "status=%d" usb_ohci_ed_read_error(uint32_t addr) "ED read error at 0x%x" usb_ohci_ed_pkt(uint32_t cur, int h, int c, uint32_t head, uint32_t tail, uint32_t next) "ED @ 0x%.8x h=%u c=%u\n head=0x%.8x tailp=0x%.8x next=0x%.8x" diff --git a/include/block/block-common.h b/include/block/block-common.h new file mode 100644 index 0000000000..fdb7306e78 --- /dev/null +++ b/include/block/block-common.h @@ -0,0 +1,419 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_COMMON_H +#define BLOCK_COMMON_H + +#include "block/aio.h" +#include "block/aio-wait.h" +#include "qemu/iov.h" +#include "qemu/coroutine.h" +#include "block/accounting.h" +#include "block/dirty-bitmap.h" +#include "block/blockjob.h" +#include "qemu/hbitmap.h" +#include "qemu/transactions.h" + +/* + * generated_co_wrapper + * + * Function specifier, which does nothing but mark functions to be + * generated by scripts/block-coroutine-wrapper.py + * + * Read more in docs/devel/block-coroutine-wrapper.rst + */ +#define generated_co_wrapper + +/* block.c */ +typedef struct BlockDriver BlockDriver; +typedef struct BdrvChild BdrvChild; +typedef struct BdrvChildClass BdrvChildClass; + +typedef struct BlockDriverInfo { + /* in bytes, 0 if irrelevant */ + int cluster_size; + /* offset at which the VM state can be saved (0 if not possible) */ + int64_t vm_state_offset; + bool is_dirty; + /* + * True if this block driver only supports compressed writes + */ + bool needs_compressed_writes; +} BlockDriverInfo; + +typedef struct BlockFragInfo { + uint64_t allocated_clusters; + uint64_t total_clusters; + uint64_t fragmented_clusters; + uint64_t compressed_clusters; +} BlockFragInfo; + +typedef enum { + BDRV_REQ_COPY_ON_READ = 0x1, + BDRV_REQ_ZERO_WRITE = 0x2, + + /* + * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate + * that the block driver should unmap (discard) blocks if it is guaranteed + * that the result will read back as zeroes. The flag is only passed to the + * driver if the block device is opened with BDRV_O_UNMAP. + */ + BDRV_REQ_MAY_UNMAP = 0x4, + + BDRV_REQ_FUA = 0x10, + BDRV_REQ_WRITE_COMPRESSED = 0x20, + + /* + * Signifies that this write request will not change the visible disk + * content. + */ + BDRV_REQ_WRITE_UNCHANGED = 0x40, + + /* + * Forces request serialisation. Use only with write requests. + */ + BDRV_REQ_SERIALISING = 0x80, + + /* + * Execute the request only if the operation can be offloaded or otherwise + * be executed efficiently, but return an error instead of using a slow + * fallback. + */ + BDRV_REQ_NO_FALLBACK = 0x100, + + /* + * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read + * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR + * filter is involved), in which case it signals that the COR operation + * need not read the data into memory (qiov) but only ensure they are + * copied to the top layer (i.e., that COR operation is done). + */ + BDRV_REQ_PREFETCH = 0x200, + + /* + * If we need to wait for other requests, just fail immediately. Used + * only together with BDRV_REQ_SERIALISING. Used only with requests aligned + * to request_alignment (corresponding assertions are in block/io.c). + */ + BDRV_REQ_NO_WAIT = 0x400, + + /* Mask of valid flags */ + BDRV_REQ_MASK = 0x7ff, +} BdrvRequestFlags; + +#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */ +#define BDRV_O_RDWR 0x0002 +#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */ +#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save + writes in a snapshot */ +#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ +#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ +#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the + thread pool */ +#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */ +#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */ +#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */ +#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */ +#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */ +#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */ +#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */ +#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given: + select an appropriate protocol driver, + ignoring the format layer */ +#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */ +#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening + read-write fails */ +#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */ + +#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH) + + +/* Option names of options parsed by the block layer */ + +#define BDRV_OPT_CACHE_WB "cache.writeback" +#define BDRV_OPT_CACHE_DIRECT "cache.direct" +#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" +#define BDRV_OPT_READ_ONLY "read-only" +#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only" +#define BDRV_OPT_DISCARD "discard" +#define BDRV_OPT_FORCE_SHARE "force-share" + + +#define BDRV_SECTOR_BITS 9 +#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) + +#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \ + INT_MAX >> BDRV_SECTOR_BITS) +#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) + +/* + * We want allow aligning requests and disk length up to any 32bit alignment + * and don't afraid of overflow. + * To achieve it, and in the same time use some pretty number as maximum disk + * size, let's define maximum "length" (a limit for any offset/bytes request and + * for disk size) to be the greatest power of 2 less than INT64_MAX. + */ +#define BDRV_MAX_ALIGNMENT (1L << 30) +#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT)) + +/* + * Allocation status flags for bdrv_block_status() and friends. + * + * Public flags: + * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer + * BDRV_BLOCK_ZERO: offset reads as zero + * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data + * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this + * layer rather than any backing, set by block layer + * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this + * layer, set by block layer + * + * Internal flags: + * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request + * that the block layer recompute the answer from the returned + * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID. + * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for + * zeroes in file child of current block node inside + * returned region. Only valid together with both + * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not + * appear with BDRV_BLOCK_ZERO. + * + * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the + * host offset within the returned BDS that is allocated for the + * corresponding raw guest data. However, whether that offset + * actually contains data also depends on BDRV_BLOCK_DATA, as follows: + * + * DATA ZERO OFFSET_VALID + * t t t sectors read as zero, returned file is zero at offset + * t f t sectors read as valid from file at offset + * f t t sectors preallocated, read as zero, returned file not + * necessarily zero at offset + * f f t sectors preallocated but read from backing_hd, + * returned file contains garbage at offset + * t t f sectors preallocated, read as zero, unknown offset + * t f f sectors read from unknown file or offset + * f t f not allocated or unknown offset, read as zero + * f f f not allocated or unknown offset, read from backing_hd + */ +#define BDRV_BLOCK_DATA 0x01 +#define BDRV_BLOCK_ZERO 0x02 +#define BDRV_BLOCK_OFFSET_VALID 0x04 +#define BDRV_BLOCK_RAW 0x08 +#define BDRV_BLOCK_ALLOCATED 0x10 +#define BDRV_BLOCK_EOF 0x20 +#define BDRV_BLOCK_RECURSE 0x40 + +typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; + +typedef struct BDRVReopenState { + BlockDriverState *bs; + int flags; + BlockdevDetectZeroesOptions detect_zeroes; + bool backing_missing; + BlockDriverState *old_backing_bs; /* keep pointer for permissions update */ + BlockDriverState *old_file_bs; /* keep pointer for permissions update */ + QDict *options; + QDict *explicit_options; + void *opaque; +} BDRVReopenState; + +/* + * Block operation types + */ +typedef enum BlockOpType { + BLOCK_OP_TYPE_BACKUP_SOURCE, + BLOCK_OP_TYPE_BACKUP_TARGET, + BLOCK_OP_TYPE_CHANGE, + BLOCK_OP_TYPE_COMMIT_SOURCE, + BLOCK_OP_TYPE_COMMIT_TARGET, + BLOCK_OP_TYPE_DATAPLANE, + BLOCK_OP_TYPE_DRIVE_DEL, + BLOCK_OP_TYPE_EJECT, + BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, + BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, + BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, + BLOCK_OP_TYPE_MIRROR_SOURCE, + BLOCK_OP_TYPE_MIRROR_TARGET, + BLOCK_OP_TYPE_RESIZE, + BLOCK_OP_TYPE_STREAM, + BLOCK_OP_TYPE_REPLACE, + BLOCK_OP_TYPE_MAX, +} BlockOpType; + +/* Block node permission constants */ +enum { + /** + * A user that has the "permission" of consistent reads is guaranteed that + * their view of the contents of the block device is complete and + * self-consistent, representing the contents of a disk at a specific + * point. + * + * For most block devices (including their backing files) this is true, but + * the property cannot be maintained in a few situations like for + * intermediate nodes of a commit block job. + */ + BLK_PERM_CONSISTENT_READ = 0x01, + + /** This permission is required to change the visible disk contents. */ + BLK_PERM_WRITE = 0x02, + + /** + * This permission (which is weaker than BLK_PERM_WRITE) is both enough and + * required for writes to the block node when the caller promises that + * the visible disk content doesn't change. + * + * As the BLK_PERM_WRITE permission is strictly stronger, either is + * sufficient to perform an unchanging write. + */ + BLK_PERM_WRITE_UNCHANGED = 0x04, + + /** This permission is required to change the size of a block node. */ + BLK_PERM_RESIZE = 0x08, + + /** + * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU + * 6.1 and earlier may still lock the corresponding byte in block/file-posix + * locking. So, implementing some new permission should be very careful to + * not interfere with this old unused thing. + */ + + BLK_PERM_ALL = 0x0f, + + DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ + | BLK_PERM_WRITE + | BLK_PERM_WRITE_UNCHANGED + | BLK_PERM_RESIZE, + + DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH, +}; + +/* + * Flags that parent nodes assign to child nodes to specify what kind of + * role(s) they take. + * + * At least one of DATA, METADATA, FILTERED, or COW must be set for + * every child. + */ +enum BdrvChildRoleBits { + /* + * This child stores data. + * Any node may have an arbitrary number of such children. + */ + BDRV_CHILD_DATA = (1 << 0), + + /* + * This child stores metadata. + * Any node may have an arbitrary number of metadata-storing + * children. + */ + BDRV_CHILD_METADATA = (1 << 1), + + /* + * A child that always presents exactly the same visible data as + * the parent, e.g. by virtue of the parent forwarding all reads + * and writes. + * This flag is mutually exclusive with DATA, METADATA, and COW. + * Any node may have at most one filtered child at a time. + */ + BDRV_CHILD_FILTERED = (1 << 2), + + /* + * Child from which to read all data that isn't allocated in the + * parent (i.e., the backing child); such data is copied to the + * parent through COW (and optionally COR). + * This field is mutually exclusive with DATA, METADATA, and + * FILTERED. + * Any node may have at most one such backing child at a time. + */ + BDRV_CHILD_COW = (1 << 3), + + /* + * The primary child. For most drivers, this is the child whose + * filename applies best to the parent node. + * Any node may have at most one primary child at a time. + */ + BDRV_CHILD_PRIMARY = (1 << 4), + + /* Useful combination of flags */ + BDRV_CHILD_IMAGE = BDRV_CHILD_DATA + | BDRV_CHILD_METADATA + | BDRV_CHILD_PRIMARY, +}; + +/* Mask of BdrvChildRoleBits values */ +typedef unsigned int BdrvChildRole; + +typedef struct BdrvCheckResult { + int corruptions; + int leaks; + int check_errors; + int corruptions_fixed; + int leaks_fixed; + int64_t image_end_offset; + BlockFragInfo bfi; +} BdrvCheckResult; + +typedef enum { + BDRV_FIX_LEAKS = 1, + BDRV_FIX_ERRORS = 2, +} BdrvCheckMode; + +typedef struct BlockSizes { + uint32_t phys; + uint32_t log; +} BlockSizes; + +typedef struct HDGeometry { + uint32_t heads; + uint32_t sectors; + uint32_t cylinders; +} HDGeometry; + +/* + * Common functions that are neither I/O nor Global State. + * + * These functions must never call any function from other categories + * (I/O, "I/O or GS", Global State) except this one, but can be invoked by + * all of them. + */ + +char *bdrv_perm_names(uint64_t perm); +uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm); + +void bdrv_init_with_whitelist(void); +bool bdrv_uses_whitelist(void); +int bdrv_is_whitelisted(BlockDriver *drv, bool read_only); + +int bdrv_parse_aio(const char *mode, int *flags); +int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough); +int bdrv_parse_discard_flags(const char *mode, int *flags); + +int path_has_protocol(const char *path); +int path_is_absolute(const char *path); +char *path_combine(const char *base_path, const char *filename); + +char *bdrv_get_full_backing_filename_from_filename(const char *backed, + const char *backing, + Error **errp); + +#endif /* BLOCK_COMMON_H */ diff --git a/include/block/block-copy.h b/include/block/block-copy.h index 99370fa38b..68bbd344b2 100644 --- a/include/block/block-copy.h +++ b/include/block/block-copy.h @@ -25,6 +25,7 @@ typedef struct BlockCopyState BlockCopyState; typedef struct BlockCopyCallState BlockCopyCallState; BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, + const BdrvDirtyBitmap *bitmap, Error **errp); /* Function should be called prior any actual copy request */ @@ -34,6 +35,7 @@ void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm); void block_copy_state_free(BlockCopyState *s); +void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes); int64_t block_copy_reset_unallocated(BlockCopyState *s, int64_t offset, int64_t *count); diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h new file mode 100644 index 0000000000..25bb69bbef --- /dev/null +++ b/include/block/block-global-state.h @@ -0,0 +1,253 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_GLOBAL_STATE_H +#define BLOCK_GLOBAL_STATE_H + +#include "block-common.h" + +/* + * Global state (GS) API. These functions run under the BQL. + * + * If a function modifies the graph, it also uses drain and/or + * aio_context_acquire/release to be sure it has unique access. + * aio_context locking is needed together with BQL because of + * the thread-safe I/O API that concurrently runs and accesses + * the graph without the BQL. + * + * It is important to note that not all of these functions are + * necessarily limited to running under the BQL, but they would + * require additional auditing and many small thread-safety changes + * to move them into the I/O API. Often it's not worth doing that + * work since the APIs are only used with the BQL held at the + * moment, so they have been placed in the GS API (for now). + * + * These functions can call any function from this and other categories + * (I/O, "I/O or GS", Common), but must be invoked only by other GS APIs. + * + * All functions in this header must use the macro + * GLOBAL_STATE_CODE(); + * to catch when they are accidentally called without the BQL. + */ + +void bdrv_init(void); +BlockDriver *bdrv_find_protocol(const char *filename, + bool allow_protocol_prefix, + Error **errp); +BlockDriver *bdrv_find_format(const char *format_name); +int bdrv_create(BlockDriver *drv, const char* filename, + QemuOpts *opts, Error **errp); +int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); + +BlockDriverState *bdrv_new(void); +int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp); +int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, + Error **errp); +int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, + Error **errp); +BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options, + int flags, Error **errp); +int bdrv_drop_filter(BlockDriverState *bs, Error **errp); + +BdrvChild *bdrv_open_child(const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState *parent, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + bool allow_none, Error **errp); +BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); +int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp); +int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + const char *bdref_key, Error **errp); +BlockDriverState *bdrv_open(const char *filename, const char *reference, + QDict *options, int flags, Error **errp); +BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, + const char *node_name, + QDict *options, int flags, + Error **errp); +BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, + int flags, Error **errp); +BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, + BlockDriverState *bs, QDict *options, + bool keep_old_opts); +void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue); +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp); +int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + Error **errp); +int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, + Error **errp); +BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, + const char *backing_file); +void bdrv_refresh_filename(BlockDriverState *bs); +void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp); +int bdrv_commit(BlockDriverState *bs); +int bdrv_make_empty(BdrvChild *c, Error **errp); +int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, + const char *backing_fmt, bool warn); +void bdrv_register(BlockDriver *bdrv); +int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + const char *backing_file_str); +BlockDriverState *bdrv_find_overlay(BlockDriverState *active, + BlockDriverState *bs); +BlockDriverState *bdrv_find_base(BlockDriverState *bs); +bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, + Error **errp); +int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, + Error **errp); +void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base); + +/* + * The units of offset and total_work_size may be chosen arbitrarily by the + * block driver; total_work_size may change during the course of the amendment + * operation + */ +typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset, + int64_t total_work_size, void *opaque); +int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb, void *cb_opaque, + bool force, + Error **errp); + +/* check if a named node can be replaced when doing drive-mirror */ +BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, + const char *node_name, Error **errp); + +int bdrv_activate(BlockDriverState *bs, Error **errp); +void bdrv_activate_all(Error **errp); +int bdrv_inactivate_all(void); + +int bdrv_flush_all(void); +void bdrv_close_all(void); +void bdrv_drain_all_begin(void); +void bdrv_drain_all_end(void); +void bdrv_drain_all(void); + +int bdrv_has_zero_init_1(BlockDriverState *bs); +int bdrv_has_zero_init(BlockDriverState *bs); +BlockDriverState *bdrv_find_node(const char *node_name); +BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp); +XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp); +BlockDriverState *bdrv_lookup_bs(const char *device, + const char *node_name, + Error **errp); +bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base); +BlockDriverState *bdrv_next_node(BlockDriverState *bs); +BlockDriverState *bdrv_next_all_states(BlockDriverState *bs); + +typedef struct BdrvNextIterator { + enum { + BDRV_NEXT_BACKEND_ROOTS, + BDRV_NEXT_MONITOR_OWNED, + } phase; + BlockBackend *blk; + BlockDriverState *bs; +} BdrvNextIterator; + +BlockDriverState *bdrv_first(BdrvNextIterator *it); +BlockDriverState *bdrv_next(BdrvNextIterator *it); +void bdrv_next_cleanup(BdrvNextIterator *it); + +BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs); +void bdrv_iterate_format(void (*it)(void *opaque, const char *name), + void *opaque, bool read_only); +int bdrv_get_flags(BlockDriverState *bs); +char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp); +char *bdrv_dirname(BlockDriverState *bs, Error **errp); + +void bdrv_img_create(const char *filename, const char *fmt, + const char *base_filename, const char *base_fmt, + char *options, uint64_t img_size, int flags, + bool quiet, Error **errp); + +void bdrv_ref(BlockDriverState *bs); +void bdrv_unref(BlockDriverState *bs); +void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const char *child_name, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + Error **errp); + +bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason); +void bdrv_op_block_all(BlockDriverState *bs, Error *reason); +void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason); +bool bdrv_op_blocker_is_empty(BlockDriverState *bs); + +int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag); +int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); +int bdrv_debug_resume(BlockDriverState *bs, const char *tag); +bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); + +/** + * Locks the AioContext of @bs if it's not the current AioContext. This avoids + * double locking which could lead to deadlocks: This is a coroutine_fn, so we + * know we already own the lock of the current AioContext. + * + * May only be called in the main thread. + */ +void coroutine_fn bdrv_co_lock(BlockDriverState *bs); + +/** + * Unlocks the AioContext of @bs if it's not the current AioContext. + */ +void coroutine_fn bdrv_co_unlock(BlockDriverState *bs); + +void bdrv_set_aio_context_ignore(BlockDriverState *bs, + AioContext *new_context, GSList **ignore); +int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + Error **errp); +int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp); +bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, + GSList **ignore, Error **errp); +bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, + GSList **ignore, Error **errp); +AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c); + +int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); +int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); + +void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child, + Error **errp); +void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp); + +/** + * + * bdrv_register_buf/bdrv_unregister_buf: + * + * Register/unregister a buffer for I/O. For example, VFIO drivers are + * interested to know the memory areas that would later be used for I/O, so + * that they can prepare IOMMU mapping etc., to get better performance. + */ +void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size); +void bdrv_unregister_buf(BlockDriverState *bs, void *host); + +void bdrv_cancel_in_flight(BlockDriverState *bs); + +#endif /* BLOCK_GLOBAL_STATE_H */ diff --git a/include/block/block-io.h b/include/block/block-io.h new file mode 100644 index 0000000000..5e3f346806 --- /dev/null +++ b/include/block/block-io.h @@ -0,0 +1,368 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_IO_H +#define BLOCK_IO_H + +#include "block-common.h" + +/* + * I/O API functions. These functions are thread-safe, and therefore + * can run in any thread as long as the thread has called + * aio_context_acquire/release(). + * + * These functions can only call functions from I/O and Common categories, + * but can be invoked by GS, "I/O or GS" and I/O APIs. + * + * All functions in this category must use the macro + * IO_CODE(); + * to catch when they are accidentally called by the wrong API. + */ + +int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, + int64_t bytes, BdrvRequestFlags flags); +int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); +int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes); +int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, + int64_t bytes); +int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, + const void *buf, int64_t bytes); +/* + * Efficiently zero a region of the disk image. Note that this is a regular + * I/O request like read or write and should have a reasonable size. This + * function is not suitable for zeroing the entire image in a single request + * because it may allocate memory for the entire region. + */ +int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, + int64_t bytes, BdrvRequestFlags flags); + +int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, + Error **errp); + +int64_t bdrv_nb_sectors(BlockDriverState *bs); +int64_t bdrv_getlength(BlockDriverState *bs); +int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); +BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, + BlockDriverState *in_bs, Error **errp); +void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); +int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp); +void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs); + + +/* async block I/O */ +void bdrv_aio_cancel(BlockAIOCB *acb); +void bdrv_aio_cancel_async(BlockAIOCB *acb); + +/* sg packet commands */ +int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf); + +/* Ensure contents are flushed to disk. */ +int coroutine_fn bdrv_co_flush(BlockDriverState *bs); + +int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); +bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); +int bdrv_block_status(BlockDriverState *bs, int64_t offset, + int64_t bytes, int64_t *pnum, int64_t *map, + BlockDriverState **file); +int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, + int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file); +int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t *pnum); +int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, + bool include_base, int64_t offset, int64_t bytes, + int64_t *pnum); +int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, + int64_t bytes); + +int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, + bool ignore_allow_rdw, Error **errp); +int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, + Error **errp); +bool bdrv_is_read_only(BlockDriverState *bs); +bool bdrv_is_writable(BlockDriverState *bs); +bool bdrv_is_sg(BlockDriverState *bs); +bool bdrv_is_inserted(BlockDriverState *bs); +void bdrv_lock_medium(BlockDriverState *bs, bool locked); +void bdrv_eject(BlockDriverState *bs, bool eject_flag); +const char *bdrv_get_format_name(BlockDriverState *bs); + +bool bdrv_supports_compressed_writes(BlockDriverState *bs); +const char *bdrv_get_node_name(const BlockDriverState *bs); +const char *bdrv_get_device_name(const BlockDriverState *bs); +const char *bdrv_get_device_or_node_name(const BlockDriverState *bs); +int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); +ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, + Error **errp); +BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs); +void bdrv_round_to_clusters(BlockDriverState *bs, + int64_t offset, int64_t bytes, + int64_t *cluster_offset, + int64_t *cluster_bytes); + +void bdrv_get_backing_filename(BlockDriverState *bs, + char *filename, int filename_size); + +int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, + int64_t pos, int size); + +int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, + int64_t pos, int size); + +/* + * Returns the alignment in bytes that is required so that no bounce buffer + * is required throughout the stack + */ +size_t bdrv_min_mem_align(BlockDriverState *bs); +/* Returns optimal alignment in bytes for bounce buffer */ +size_t bdrv_opt_mem_align(BlockDriverState *bs); +void *qemu_blockalign(BlockDriverState *bs, size_t size); +void *qemu_blockalign0(BlockDriverState *bs, size_t size); +void *qemu_try_blockalign(BlockDriverState *bs, size_t size); +void *qemu_try_blockalign0(BlockDriverState *bs, size_t size); +bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); + +void bdrv_enable_copy_on_read(BlockDriverState *bs); +void bdrv_disable_copy_on_read(BlockDriverState *bs); + +void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event); + +#define BLKDBG_EVENT(child, evt) \ + do { \ + if (child) { \ + bdrv_debug_event(child->bs, evt); \ + } \ + } while (0) + +/** + * bdrv_get_aio_context: + * + * Returns: the currently bound #AioContext + */ +AioContext *bdrv_get_aio_context(BlockDriverState *bs); + +/** + * Move the current coroutine to the AioContext of @bs and return the old + * AioContext of the coroutine. Increase bs->in_flight so that draining @bs + * will wait for the operation to proceed until the corresponding + * bdrv_co_leave(). + * + * Consequently, you can't call drain inside a bdrv_co_enter/leave() section as + * this will deadlock. + */ +AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs); + +/** + * Ends a section started by bdrv_co_enter(). Move the current coroutine back + * to old_ctx and decrease bs->in_flight again. + */ +void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx); + +/** + * Transfer control to @co in the aio context of @bs + */ +void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co); + +AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c); + +void bdrv_io_plug(BlockDriverState *bs); +void bdrv_io_unplug(BlockDriverState *bs); + +bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, + uint32_t granularity, Error **errp); + +/** + * + * bdrv_co_copy_range: + * + * Do offloaded copy between two children. If the operation is not implemented + * by the driver, or if the backend storage doesn't support it, a negative + * error code will be returned. + * + * Note: block layer doesn't emulate or fallback to a bounce buffer approach + * because usually the caller shouldn't attempt offloaded copy any more (e.g. + * calling copy_file_range(2)) after the first error, thus it should fall back + * to a read+write path in the caller level. + * + * @src: Source child to copy data from + * @src_offset: offset in @src image to read data + * @dst: Destination child to copy data to + * @dst_offset: offset in @dst image to write data + * @bytes: number of bytes to copy + * @flags: request flags. Supported flags: + * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero + * write on @dst as if bdrv_co_pwrite_zeroes is + * called. Used to simplify caller code, or + * during BlockDriver.bdrv_co_copy_range_from() + * recursion. + * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping + * requests currently in flight. + * + * Returns: 0 if succeeded; negative error code if failed. + **/ +int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, + BdrvChild *dst, int64_t dst_offset, + int64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + +/** + * bdrv_drained_end_no_poll: + * + * Same as bdrv_drained_end(), but do not poll for the subgraph to + * actually become unquiesced. Therefore, no graph changes will occur + * with this function. + * + * *drained_end_counter is incremented for every background operation + * that is scheduled, and will be decremented for every operation once + * it settles. The caller must poll until it reaches 0. The counter + * should be accessed using atomic operations only. + */ +void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); + + +/* + * "I/O or GS" API functions. These functions can run without + * the BQL, but only in one specific iothread/main loop. + * + * More specifically, these functions use BDRV_POLL_WHILE(bs), which + * requires the caller to be either in the main thread and hold + * the BlockdriverState (bs) AioContext lock, or directly in the + * home thread that runs the bs AioContext. Calling them from + * another thread in another AioContext would cause deadlocks. + * + * Therefore, these functions are not proper I/O, because they + * can't run in *any* iothreads, but only in a specific one. + * + * These functions can call any function from I/O, Common and this + * categories, but must be invoked only by other "I/O or GS" and GS APIs. + * + * All functions in this category must use the macro + * IO_OR_GS_CODE(); + * to catch when they are accidentally called by the wrong API. + */ + +#define BDRV_POLL_WHILE(bs, cond) ({ \ + BlockDriverState *bs_ = (bs); \ + IO_OR_GS_CODE(); \ + AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \ + cond); }) + +void bdrv_drain(BlockDriverState *bs); +void coroutine_fn bdrv_co_drain(BlockDriverState *bs); + +int generated_co_wrapper +bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); + +int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix); + +/* Invalidate any cached metadata used by image formats */ +int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs, + Error **errp); +int generated_co_wrapper bdrv_flush(BlockDriverState *bs); +int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset, + int64_t bytes); +int generated_co_wrapper +bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); +int generated_co_wrapper +bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); + +/** + * bdrv_parent_drained_begin_single: + * + * Begin a quiesced section for the parent of @c. If @poll is true, wait for + * any pending activity to cease. + */ +void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); + +/** + * bdrv_parent_drained_end_single: + * + * End a quiesced section for the parent of @c. + * + * This polls @bs's AioContext until all scheduled sub-drained_ends + * have settled, which may result in graph changes. + */ +void bdrv_parent_drained_end_single(BdrvChild *c); + +/** + * bdrv_drain_poll: + * + * Poll for pending requests in @bs, its parents (except for @ignore_parent), + * and if @recursive is true its children as well (used for subtree drain). + * + * If @ignore_bds_parents is true, parents that are BlockDriverStates must + * ignore the drain request because they will be drained separately (used for + * drain_all). + * + * This is part of bdrv_drained_begin. + */ +bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + BdrvChild *ignore_parent, bool ignore_bds_parents); + +/** + * bdrv_drained_begin: + * + * Begin a quiesced section for exclusive access to the BDS, by disabling + * external request sources including NBD server, block jobs, and device model. + * + * This function can be recursive. + */ +void bdrv_drained_begin(BlockDriverState *bs); + +/** + * bdrv_do_drained_begin_quiesce: + * + * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already + * running requests to complete. + */ +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + BdrvChild *parent, bool ignore_bds_parents); + +/** + * Like bdrv_drained_begin, but recursively begins a quiesced section for + * exclusive access to all child nodes as well. + */ +void bdrv_subtree_drained_begin(BlockDriverState *bs); + +/** + * bdrv_drained_end: + * + * End a quiescent section started by bdrv_drained_begin(). + * + * This polls @bs's AioContext until all scheduled sub-drained_ends + * have settled. On one hand, that may result in graph changes. On + * the other, this requires that the caller either runs in the main + * loop; or that all involved nodes (@bs and all of its parents) are + * in the caller's AioContext. + */ +void bdrv_drained_end(BlockDriverState *bs); + +/** + * End a quiescent section started by bdrv_subtree_drained_begin(). + */ +void bdrv_subtree_drained_end(BlockDriverState *bs); + +#endif /* BLOCK_IO_H */ diff --git a/include/block/block.h b/include/block/block.h index e1713ee306..1e6b8fef1e 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -1,864 +1,32 @@ -#ifndef BLOCK_H -#define BLOCK_H - -#include "block/aio.h" -#include "block/aio-wait.h" -#include "qemu/iov.h" -#include "qemu/coroutine.h" -#include "block/accounting.h" -#include "block/dirty-bitmap.h" -#include "block/blockjob.h" -#include "qemu/hbitmap.h" -#include "qemu/transactions.h" - /* - * generated_co_wrapper - * - * Function specifier, which does nothing but mark functions to be - * generated by scripts/block-coroutine-wrapper.py - * - * Read more in docs/devel/block-coroutine-wrapper.rst - */ -#define generated_co_wrapper - -/* block.c */ -typedef struct BlockDriver BlockDriver; -typedef struct BdrvChild BdrvChild; -typedef struct BdrvChildClass BdrvChildClass; - -typedef struct BlockDriverInfo { - /* in bytes, 0 if irrelevant */ - int cluster_size; - /* offset at which the VM state can be saved (0 if not possible) */ - int64_t vm_state_offset; - bool is_dirty; - /* - * True if this block driver only supports compressed writes - */ - bool needs_compressed_writes; -} BlockDriverInfo; - -typedef struct BlockFragInfo { - uint64_t allocated_clusters; - uint64_t total_clusters; - uint64_t fragmented_clusters; - uint64_t compressed_clusters; -} BlockFragInfo; - -typedef enum { - BDRV_REQ_COPY_ON_READ = 0x1, - BDRV_REQ_ZERO_WRITE = 0x2, - - /* - * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate - * that the block driver should unmap (discard) blocks if it is guaranteed - * that the result will read back as zeroes. The flag is only passed to the - * driver if the block device is opened with BDRV_O_UNMAP. - */ - BDRV_REQ_MAY_UNMAP = 0x4, - - BDRV_REQ_FUA = 0x10, - BDRV_REQ_WRITE_COMPRESSED = 0x20, - - /* Signifies that this write request will not change the visible disk - * content. */ - BDRV_REQ_WRITE_UNCHANGED = 0x40, - - /* Forces request serialisation. Use only with write requests. */ - BDRV_REQ_SERIALISING = 0x80, - - /* Execute the request only if the operation can be offloaded or otherwise - * be executed efficiently, but return an error instead of using a slow - * fallback. */ - BDRV_REQ_NO_FALLBACK = 0x100, - - /* - * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read - * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR - * filter is involved), in which case it signals that the COR operation - * need not read the data into memory (qiov) but only ensure they are - * copied to the top layer (i.e., that COR operation is done). - */ - BDRV_REQ_PREFETCH = 0x200, - - /* - * If we need to wait for other requests, just fail immediately. Used - * only together with BDRV_REQ_SERIALISING. - */ - BDRV_REQ_NO_WAIT = 0x400, - - /* Mask of valid flags */ - BDRV_REQ_MASK = 0x7ff, -} BdrvRequestFlags; - -typedef struct BlockSizes { - uint32_t phys; - uint32_t log; -} BlockSizes; - -typedef struct HDGeometry { - uint32_t heads; - uint32_t sectors; - uint32_t cylinders; -} HDGeometry; - -#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */ -#define BDRV_O_RDWR 0x0002 -#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */ -#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */ -#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ -#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ -#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */ -#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */ -#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */ -#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */ -#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */ -#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */ -#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */ -#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */ -#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given: - select an appropriate protocol driver, - ignoring the format layer */ -#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */ -#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening read-write fails */ -#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */ - -#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH) - - -/* Option names of options parsed by the block layer */ - -#define BDRV_OPT_CACHE_WB "cache.writeback" -#define BDRV_OPT_CACHE_DIRECT "cache.direct" -#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" -#define BDRV_OPT_READ_ONLY "read-only" -#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only" -#define BDRV_OPT_DISCARD "discard" -#define BDRV_OPT_FORCE_SHARE "force-share" - - -#define BDRV_SECTOR_BITS 9 -#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) - -#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \ - INT_MAX >> BDRV_SECTOR_BITS) -#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) - -/* - * We want allow aligning requests and disk length up to any 32bit alignment - * and don't afraid of overflow. - * To achieve it, and in the same time use some pretty number as maximum disk - * size, let's define maximum "length" (a limit for any offset/bytes request and - * for disk size) to be the greatest power of 2 less than INT64_MAX. - */ -#define BDRV_MAX_ALIGNMENT (1L << 30) -#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT)) - -/* - * Allocation status flags for bdrv_block_status() and friends. - * - * Public flags: - * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer - * BDRV_BLOCK_ZERO: offset reads as zero - * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data - * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this - * layer rather than any backing, set by block layer - * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this - * layer, set by block layer - * - * Internal flags: - * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request - * that the block layer recompute the answer from the returned - * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID. - * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for - * zeroes in file child of current block node inside - * returned region. Only valid together with both - * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not - * appear with BDRV_BLOCK_ZERO. - * - * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the - * host offset within the returned BDS that is allocated for the - * corresponding raw guest data. However, whether that offset - * actually contains data also depends on BDRV_BLOCK_DATA, as follows: - * - * DATA ZERO OFFSET_VALID - * t t t sectors read as zero, returned file is zero at offset - * t f t sectors read as valid from file at offset - * f t t sectors preallocated, read as zero, returned file not - * necessarily zero at offset - * f f t sectors preallocated but read from backing_hd, - * returned file contains garbage at offset - * t t f sectors preallocated, read as zero, unknown offset - * t f f sectors read from unknown file or offset - * f t f not allocated or unknown offset, read as zero - * f f f not allocated or unknown offset, read from backing_hd - */ -#define BDRV_BLOCK_DATA 0x01 -#define BDRV_BLOCK_ZERO 0x02 -#define BDRV_BLOCK_OFFSET_VALID 0x04 -#define BDRV_BLOCK_RAW 0x08 -#define BDRV_BLOCK_ALLOCATED 0x10 -#define BDRV_BLOCK_EOF 0x20 -#define BDRV_BLOCK_RECURSE 0x40 - -typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; - -typedef struct BDRVReopenState { - BlockDriverState *bs; - int flags; - BlockdevDetectZeroesOptions detect_zeroes; - bool backing_missing; - BlockDriverState *old_backing_bs; /* keep pointer for permissions update */ - BlockDriverState *old_file_bs; /* keep pointer for permissions update */ - QDict *options; - QDict *explicit_options; - void *opaque; -} BDRVReopenState; - -/* - * Block operation types - */ -typedef enum BlockOpType { - BLOCK_OP_TYPE_BACKUP_SOURCE, - BLOCK_OP_TYPE_BACKUP_TARGET, - BLOCK_OP_TYPE_CHANGE, - BLOCK_OP_TYPE_COMMIT_SOURCE, - BLOCK_OP_TYPE_COMMIT_TARGET, - BLOCK_OP_TYPE_DATAPLANE, - BLOCK_OP_TYPE_DRIVE_DEL, - BLOCK_OP_TYPE_EJECT, - BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, - BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, - BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, - BLOCK_OP_TYPE_MIRROR_SOURCE, - BLOCK_OP_TYPE_MIRROR_TARGET, - BLOCK_OP_TYPE_RESIZE, - BLOCK_OP_TYPE_STREAM, - BLOCK_OP_TYPE_REPLACE, - BLOCK_OP_TYPE_MAX, -} BlockOpType; - -/* Block node permission constants */ -enum { - /** - * A user that has the "permission" of consistent reads is guaranteed that - * their view of the contents of the block device is complete and - * self-consistent, representing the contents of a disk at a specific - * point. - * - * For most block devices (including their backing files) this is true, but - * the property cannot be maintained in a few situations like for - * intermediate nodes of a commit block job. - */ - BLK_PERM_CONSISTENT_READ = 0x01, - - /** This permission is required to change the visible disk contents. */ - BLK_PERM_WRITE = 0x02, - - /** - * This permission (which is weaker than BLK_PERM_WRITE) is both enough and - * required for writes to the block node when the caller promises that - * the visible disk content doesn't change. - * - * As the BLK_PERM_WRITE permission is strictly stronger, either is - * sufficient to perform an unchanging write. - */ - BLK_PERM_WRITE_UNCHANGED = 0x04, - - /** This permission is required to change the size of a block node. */ - BLK_PERM_RESIZE = 0x08, - - /** - * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU - * 6.1 and earlier may still lock the corresponding byte in block/file-posix - * locking. So, implementing some new permission should be very careful to - * not interfere with this old unused thing. - */ - - BLK_PERM_ALL = 0x0f, - - DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ - | BLK_PERM_WRITE - | BLK_PERM_WRITE_UNCHANGED - | BLK_PERM_RESIZE, - - DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH, -}; - -/* - * Flags that parent nodes assign to child nodes to specify what kind of - * role(s) they take. - * - * At least one of DATA, METADATA, FILTERED, or COW must be set for - * every child. - */ -enum BdrvChildRoleBits { - /* - * This child stores data. - * Any node may have an arbitrary number of such children. - */ - BDRV_CHILD_DATA = (1 << 0), - - /* - * This child stores metadata. - * Any node may have an arbitrary number of metadata-storing - * children. - */ - BDRV_CHILD_METADATA = (1 << 1), - - /* - * A child that always presents exactly the same visible data as - * the parent, e.g. by virtue of the parent forwarding all reads - * and writes. - * This flag is mutually exclusive with DATA, METADATA, and COW. - * Any node may have at most one filtered child at a time. - */ - BDRV_CHILD_FILTERED = (1 << 2), - - /* - * Child from which to read all data that isn't allocated in the - * parent (i.e., the backing child); such data is copied to the - * parent through COW (and optionally COR). - * This field is mutually exclusive with DATA, METADATA, and - * FILTERED. - * Any node may have at most one such backing child at a time. - */ - BDRV_CHILD_COW = (1 << 3), - - /* - * The primary child. For most drivers, this is the child whose - * filename applies best to the parent node. - * Any node may have at most one primary child at a time. - */ - BDRV_CHILD_PRIMARY = (1 << 4), - - /* Useful combination of flags */ - BDRV_CHILD_IMAGE = BDRV_CHILD_DATA - | BDRV_CHILD_METADATA - | BDRV_CHILD_PRIMARY, -}; - -/* Mask of BdrvChildRoleBits values */ -typedef unsigned int BdrvChildRole; - -char *bdrv_perm_names(uint64_t perm); -uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm); - -void bdrv_init(void); -void bdrv_init_with_whitelist(void); -bool bdrv_uses_whitelist(void); -int bdrv_is_whitelisted(BlockDriver *drv, bool read_only); -BlockDriver *bdrv_find_protocol(const char *filename, - bool allow_protocol_prefix, - Error **errp); -BlockDriver *bdrv_find_format(const char *format_name); -int bdrv_create(BlockDriver *drv, const char* filename, - QemuOpts *opts, Error **errp); -int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); - -BlockDriverState *bdrv_new(void); -int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - Error **errp); -int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, - Error **errp); -int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, - Error **errp); -BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options, - int flags, Error **errp); -int bdrv_drop_filter(BlockDriverState *bs, Error **errp); - -int bdrv_parse_aio(const char *mode, int *flags); -int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough); -int bdrv_parse_discard_flags(const char *mode, int *flags); -BdrvChild *bdrv_open_child(const char *filename, - QDict *options, const char *bdref_key, - BlockDriverState* parent, - const BdrvChildClass *child_class, - BdrvChildRole child_role, - bool allow_none, Error **errp); -BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); -int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - Error **errp); -int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, - const char *bdref_key, Error **errp); -BlockDriverState *bdrv_open(const char *filename, const char *reference, - QDict *options, int flags, Error **errp); -BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv, - const char *node_name, - QDict *options, int flags, - Error **errp); -BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name, - int flags, Error **errp); -BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, - BlockDriverState *bs, QDict *options, - bool keep_old_opts); -void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue); -int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp); -int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, - Error **errp); -int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, - Error **errp); -int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, - int64_t bytes, BdrvRequestFlags flags); -int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); -int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes); -int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, - int64_t bytes); -int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, - const void *buf, int64_t bytes); -/* - * Efficiently zero a region of the disk image. Note that this is a regular - * I/O request like read or write and should have a reasonable size. This - * function is not suitable for zeroing the entire image in a single request - * because it may allocate memory for the entire region. - */ -int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, - int64_t bytes, BdrvRequestFlags flags); -BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, - const char *backing_file); -void bdrv_refresh_filename(BlockDriverState *bs); - -int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, - PreallocMode prealloc, BdrvRequestFlags flags, - Error **errp); -int generated_co_wrapper -bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, - PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); - -int64_t bdrv_nb_sectors(BlockDriverState *bs); -int64_t bdrv_getlength(BlockDriverState *bs); -int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); -BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts, - BlockDriverState *in_bs, Error **errp); -void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); -void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp); -int bdrv_commit(BlockDriverState *bs); -int bdrv_make_empty(BdrvChild *c, Error **errp); -int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, - const char *backing_fmt, bool warn); -void bdrv_register(BlockDriver *bdrv); -int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - const char *backing_file_str); -BlockDriverState *bdrv_find_overlay(BlockDriverState *active, - BlockDriverState *bs); -BlockDriverState *bdrv_find_base(BlockDriverState *bs); -bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, - Error **errp); -int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, - Error **errp); -void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base); -int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp); -void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs); - - -typedef struct BdrvCheckResult { - int corruptions; - int leaks; - int check_errors; - int corruptions_fixed; - int leaks_fixed; - int64_t image_end_offset; - BlockFragInfo bfi; -} BdrvCheckResult; - -typedef enum { - BDRV_FIX_LEAKS = 1, - BDRV_FIX_ERRORS = 2, -} BdrvCheckMode; - -int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, - BdrvCheckMode fix); - -/* The units of offset and total_work_size may be chosen arbitrarily by the - * block driver; total_work_size may change during the course of the amendment - * operation */ -typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset, - int64_t total_work_size, void *opaque); -int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, - BlockDriverAmendStatusCB *status_cb, void *cb_opaque, - bool force, - Error **errp); - -/* check if a named node can be replaced when doing drive-mirror */ -BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, - const char *node_name, Error **errp); - -/* async block I/O */ -void bdrv_aio_cancel(BlockAIOCB *acb); -void bdrv_aio_cancel_async(BlockAIOCB *acb); - -/* sg packet commands */ -int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf); - -/* Invalidate any cached metadata used by image formats */ -int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs, - Error **errp); -void bdrv_invalidate_cache_all(Error **errp); -int bdrv_inactivate_all(void); - -/* Ensure contents are flushed to disk. */ -int generated_co_wrapper bdrv_flush(BlockDriverState *bs); -int coroutine_fn bdrv_co_flush(BlockDriverState *bs); -int bdrv_flush_all(void); -void bdrv_close_all(void); -void bdrv_drain(BlockDriverState *bs); -void coroutine_fn bdrv_co_drain(BlockDriverState *bs); -void bdrv_drain_all_begin(void); -void bdrv_drain_all_end(void); -void bdrv_drain_all(void); - -#define BDRV_POLL_WHILE(bs, cond) ({ \ - BlockDriverState *bs_ = (bs); \ - AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \ - cond); }) - -int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset, - int64_t bytes); -int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); -int bdrv_has_zero_init_1(BlockDriverState *bs); -int bdrv_has_zero_init(BlockDriverState *bs); -bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); -int bdrv_block_status(BlockDriverState *bs, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file); -int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, - int64_t offset, int64_t bytes, int64_t *pnum, - int64_t *map, BlockDriverState **file); -int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, - int64_t *pnum); -int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, - bool include_base, int64_t offset, int64_t bytes, - int64_t *pnum); -int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, - int64_t bytes); - -bool bdrv_is_read_only(BlockDriverState *bs); -int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, - bool ignore_allow_rdw, Error **errp); -int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, - Error **errp); -bool bdrv_is_writable(BlockDriverState *bs); -bool bdrv_is_sg(BlockDriverState *bs); -bool bdrv_is_inserted(BlockDriverState *bs); -void bdrv_lock_medium(BlockDriverState *bs, bool locked); -void bdrv_eject(BlockDriverState *bs, bool eject_flag); -const char *bdrv_get_format_name(BlockDriverState *bs); -BlockDriverState *bdrv_find_node(const char *node_name); -BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp); -XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp); -BlockDriverState *bdrv_lookup_bs(const char *device, - const char *node_name, - Error **errp); -bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base); -BlockDriverState *bdrv_next_node(BlockDriverState *bs); -BlockDriverState *bdrv_next_all_states(BlockDriverState *bs); - -typedef struct BdrvNextIterator { - enum { - BDRV_NEXT_BACKEND_ROOTS, - BDRV_NEXT_MONITOR_OWNED, - } phase; - BlockBackend *blk; - BlockDriverState *bs; -} BdrvNextIterator; - -BlockDriverState *bdrv_first(BdrvNextIterator *it); -BlockDriverState *bdrv_next(BdrvNextIterator *it); -void bdrv_next_cleanup(BdrvNextIterator *it); - -BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs); -bool bdrv_supports_compressed_writes(BlockDriverState *bs); -void bdrv_iterate_format(void (*it)(void *opaque, const char *name), - void *opaque, bool read_only); -const char *bdrv_get_node_name(const BlockDriverState *bs); -const char *bdrv_get_device_name(const BlockDriverState *bs); -const char *bdrv_get_device_or_node_name(const BlockDriverState *bs); -int bdrv_get_flags(BlockDriverState *bs); -int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); -ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, - Error **errp); -BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs); -void bdrv_round_to_clusters(BlockDriverState *bs, - int64_t offset, int64_t bytes, - int64_t *cluster_offset, - int64_t *cluster_bytes); - -void bdrv_get_backing_filename(BlockDriverState *bs, - char *filename, int filename_size); -char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp); -char *bdrv_get_full_backing_filename_from_filename(const char *backed, - const char *backing, - Error **errp); -char *bdrv_dirname(BlockDriverState *bs, Error **errp); - -int path_has_protocol(const char *path); -int path_is_absolute(const char *path); -char *path_combine(const char *base_path, const char *filename); - -int generated_co_wrapper -bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); -int generated_co_wrapper -bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); -int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, - int64_t pos, int size); - -int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, - int64_t pos, int size); - -void bdrv_img_create(const char *filename, const char *fmt, - const char *base_filename, const char *base_fmt, - char *options, uint64_t img_size, int flags, - bool quiet, Error **errp); - -/* Returns the alignment in bytes that is required so that no bounce buffer - * is required throughout the stack */ -size_t bdrv_min_mem_align(BlockDriverState *bs); -/* Returns optimal alignment in bytes for bounce buffer */ -size_t bdrv_opt_mem_align(BlockDriverState *bs); -void *qemu_blockalign(BlockDriverState *bs, size_t size); -void *qemu_blockalign0(BlockDriverState *bs, size_t size); -void *qemu_try_blockalign(BlockDriverState *bs, size_t size); -void *qemu_try_blockalign0(BlockDriverState *bs, size_t size); -bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); - -void bdrv_enable_copy_on_read(BlockDriverState *bs); -void bdrv_disable_copy_on_read(BlockDriverState *bs); - -void bdrv_ref(BlockDriverState *bs); -void bdrv_unref(BlockDriverState *bs); -void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); -BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, - BlockDriverState *child_bs, - const char *child_name, - const BdrvChildClass *child_class, - BdrvChildRole child_role, - Error **errp); - -bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); -void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); -void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason); -void bdrv_op_block_all(BlockDriverState *bs, Error *reason); -void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason); -bool bdrv_op_blocker_is_empty(BlockDriverState *bs); - -#define BLKDBG_EVENT(child, evt) \ - do { \ - if (child) { \ - bdrv_debug_event(child->bs, evt); \ - } \ - } while (0) - -void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event); - -int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, - const char *tag); -int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); -int bdrv_debug_resume(BlockDriverState *bs, const char *tag); -bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); - -/** - * bdrv_get_aio_context: + * QEMU System Emulator block driver * - * Returns: the currently bound #AioContext - */ -AioContext *bdrv_get_aio_context(BlockDriverState *bs); - -/** - * Move the current coroutine to the AioContext of @bs and return the old - * AioContext of the coroutine. Increase bs->in_flight so that draining @bs - * will wait for the operation to proceed until the corresponding - * bdrv_co_leave(). + * Copyright (c) 2003 Fabrice Bellard * - * Consequently, you can't call drain inside a bdrv_co_enter/leave() section as - * this will deadlock. - */ -AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs); - -/** - * Ends a section started by bdrv_co_enter(). Move the current coroutine back - * to old_ctx and decrease bs->in_flight again. - */ -void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx); - -/** - * Locks the AioContext of @bs if it's not the current AioContext. This avoids - * double locking which could lead to deadlocks: This is a coroutine_fn, so we - * know we already own the lock of the current AioContext. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * - * May only be called in the main thread. - */ -void coroutine_fn bdrv_co_lock(BlockDriverState *bs); - -/** - * Unlocks the AioContext of @bs if it's not the current AioContext. - */ -void coroutine_fn bdrv_co_unlock(BlockDriverState *bs); - -/** - * Transfer control to @co in the aio context of @bs - */ -void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co); - -void bdrv_set_aio_context_ignore(BlockDriverState *bs, - AioContext *new_context, GSList **ignore); -int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, - Error **errp); -int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, - BdrvChild *ignore_child, Error **errp); -bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx, - GSList **ignore, Error **errp); -bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx, - GSList **ignore, Error **errp); -AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c); -AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c); - -int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); -int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); - -void bdrv_io_plug(BlockDriverState *bs); -void bdrv_io_unplug(BlockDriverState *bs); - -/** - * bdrv_parent_drained_begin_single: + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. * - * Begin a quiesced section for the parent of @c. If @poll is true, wait for - * any pending activity to cease. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. */ -void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); - -/** - * bdrv_parent_drained_end_single: - * - * End a quiesced section for the parent of @c. - * - * This polls @bs's AioContext until all scheduled sub-drained_ends - * have settled, which may result in graph changes. - */ -void bdrv_parent_drained_end_single(BdrvChild *c); - -/** - * bdrv_drain_poll: - * - * Poll for pending requests in @bs, its parents (except for @ignore_parent), - * and if @recursive is true its children as well (used for subtree drain). - * - * If @ignore_bds_parents is true, parents that are BlockDriverStates must - * ignore the drain request because they will be drained separately (used for - * drain_all). - * - * This is part of bdrv_drained_begin. - */ -bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - BdrvChild *ignore_parent, bool ignore_bds_parents); - -/** - * bdrv_drained_begin: - * - * Begin a quiesced section for exclusive access to the BDS, by disabling - * external request sources including NBD server, block jobs, and device model. - * - * This function can be recursive. - */ -void bdrv_drained_begin(BlockDriverState *bs); - -/** - * bdrv_do_drained_begin_quiesce: - * - * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already - * running requests to complete. - */ -void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - BdrvChild *parent, bool ignore_bds_parents); - -/** - * Like bdrv_drained_begin, but recursively begins a quiesced section for - * exclusive access to all child nodes as well. - */ -void bdrv_subtree_drained_begin(BlockDriverState *bs); - -/** - * bdrv_drained_end: - * - * End a quiescent section started by bdrv_drained_begin(). - * - * This polls @bs's AioContext until all scheduled sub-drained_ends - * have settled. On one hand, that may result in graph changes. On - * the other, this requires that the caller either runs in the main - * loop; or that all involved nodes (@bs and all of its parents) are - * in the caller's AioContext. - */ -void bdrv_drained_end(BlockDriverState *bs); - -/** - * bdrv_drained_end_no_poll: - * - * Same as bdrv_drained_end(), but do not poll for the subgraph to - * actually become unquiesced. Therefore, no graph changes will occur - * with this function. - * - * *drained_end_counter is incremented for every background operation - * that is scheduled, and will be decremented for every operation once - * it settles. The caller must poll until it reaches 0. The counter - * should be accessed using atomic operations only. - */ -void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); - -/** - * End a quiescent section started by bdrv_subtree_drained_begin(). - */ -void bdrv_subtree_drained_end(BlockDriverState *bs); - -void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child, - Error **errp); -void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp); - -bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, - uint32_t granularity, Error **errp); -/** - * - * bdrv_register_buf/bdrv_unregister_buf: - * - * Register/unregister a buffer for I/O. For example, VFIO drivers are - * interested to know the memory areas that would later be used for I/O, so - * that they can prepare IOMMU mapping etc., to get better performance. - */ -void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size); -void bdrv_unregister_buf(BlockDriverState *bs, void *host); +#ifndef BLOCK_H +#define BLOCK_H -/** - * - * bdrv_co_copy_range: - * - * Do offloaded copy between two children. If the operation is not implemented - * by the driver, or if the backend storage doesn't support it, a negative - * error code will be returned. - * - * Note: block layer doesn't emulate or fallback to a bounce buffer approach - * because usually the caller shouldn't attempt offloaded copy any more (e.g. - * calling copy_file_range(2)) after the first error, thus it should fall back - * to a read+write path in the caller level. - * - * @src: Source child to copy data from - * @src_offset: offset in @src image to read data - * @dst: Destination child to copy data to - * @dst_offset: offset in @dst image to write data - * @bytes: number of bytes to copy - * @flags: request flags. Supported flags: - * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero - * write on @dst as if bdrv_co_pwrite_zeroes is - * called. Used to simplify caller code, or - * during BlockDriver.bdrv_co_copy_range_from() - * recursion. - * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping - * requests currently in flight. - * - * Returns: 0 if succeeded; negative error code if failed. - **/ -int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, - BdrvChild *dst, int64_t dst_offset, - int64_t bytes, BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); +#include "block-global-state.h" +#include "block-io.h" -void bdrv_cancel_in_flight(BlockDriverState *bs); +/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */ -#endif +#endif /* BLOCK_H */ diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h new file mode 100644 index 0000000000..8947abab76 --- /dev/null +++ b/include/block/block_int-common.h @@ -0,0 +1,1246 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_INT_COMMON_H +#define BLOCK_INT_COMMON_H + +#include "block/accounting.h" +#include "block/block.h" +#include "block/aio-wait.h" +#include "qemu/queue.h" +#include "qemu/coroutine.h" +#include "qemu/stats64.h" +#include "qemu/timer.h" +#include "qemu/hbitmap.h" +#include "block/snapshot.h" +#include "qemu/throttle.h" +#include "qemu/rcu.h" + +#define BLOCK_FLAG_LAZY_REFCOUNTS 8 + +#define BLOCK_OPT_SIZE "size" +#define BLOCK_OPT_ENCRYPT "encryption" +#define BLOCK_OPT_ENCRYPT_FORMAT "encrypt.format" +#define BLOCK_OPT_COMPAT6 "compat6" +#define BLOCK_OPT_HWVERSION "hwversion" +#define BLOCK_OPT_BACKING_FILE "backing_file" +#define BLOCK_OPT_BACKING_FMT "backing_fmt" +#define BLOCK_OPT_CLUSTER_SIZE "cluster_size" +#define BLOCK_OPT_TABLE_SIZE "table_size" +#define BLOCK_OPT_PREALLOC "preallocation" +#define BLOCK_OPT_SUBFMT "subformat" +#define BLOCK_OPT_COMPAT_LEVEL "compat" +#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" +#define BLOCK_OPT_ADAPTER_TYPE "adapter_type" +#define BLOCK_OPT_REDUNDANCY "redundancy" +#define BLOCK_OPT_NOCOW "nocow" +#define BLOCK_OPT_EXTENT_SIZE_HINT "extent_size_hint" +#define BLOCK_OPT_OBJECT_SIZE "object_size" +#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits" +#define BLOCK_OPT_DATA_FILE "data_file" +#define BLOCK_OPT_DATA_FILE_RAW "data_file_raw" +#define BLOCK_OPT_COMPRESSION_TYPE "compression_type" +#define BLOCK_OPT_EXTL2 "extended_l2" + +#define BLOCK_PROBE_BUF_SIZE 512 + +enum BdrvTrackedRequestType { + BDRV_TRACKED_READ, + BDRV_TRACKED_WRITE, + BDRV_TRACKED_DISCARD, + BDRV_TRACKED_TRUNCATE, +}; + +/* + * That is not quite good that BdrvTrackedRequest structure is public, + * as block/io.c is very careful about incoming offset/bytes being + * correct. Be sure to assert bdrv_check_request() succeeded after any + * modification of BdrvTrackedRequest object out of block/io.c + */ +typedef struct BdrvTrackedRequest { + BlockDriverState *bs; + int64_t offset; + int64_t bytes; + enum BdrvTrackedRequestType type; + + bool serialising; + int64_t overlap_offset; + int64_t overlap_bytes; + + QLIST_ENTRY(BdrvTrackedRequest) list; + Coroutine *co; /* owner, used for deadlock detection */ + CoQueue wait_queue; /* coroutines blocked on this request */ + + struct BdrvTrackedRequest *waiting_for; +} BdrvTrackedRequest; + + +struct BlockDriver { + /* + * These fields are initialized when this object is created, + * and are never changed afterwards. + */ + + const char *format_name; + int instance_size; + + /* + * Set to true if the BlockDriver is a block filter. Block filters pass + * certain callbacks that refer to data (see block.c) to their bs->file + * or bs->backing (whichever one exists) if the driver doesn't implement + * them. Drivers that do not wish to forward must implement them and return + * -ENOTSUP. + * Note that filters are not allowed to modify data. + * + * Filters generally cannot have more than a single filtered child, + * because the data they present must at all times be the same as + * that on their filtered child. That would be impossible to + * achieve for multiple filtered children. + * (And this filtered child must then be bs->file or bs->backing.) + */ + bool is_filter; + /* + * Set to true if the BlockDriver is a format driver. Format nodes + * generally do not expect their children to be other format nodes + * (except for backing files), and so format probing is disabled + * on those children. + */ + bool is_format; + + /* + * Drivers not implementing bdrv_parse_filename nor bdrv_open should have + * this field set to true, except ones that are defined only by their + * child's bs. + * An example of the last type will be the quorum block driver. + */ + bool bdrv_needs_filename; + + /* + * Set if a driver can support backing files. This also implies the + * following semantics: + * + * - Return status 0 of .bdrv_co_block_status means that corresponding + * blocks are not allocated in this layer of backing-chain + * - For such (unallocated) blocks, read will: + * - fill buffer with zeros if there is no backing file + * - read from the backing file otherwise, where the block layer + * takes care of reading zeros beyond EOF if backing file is short + */ + bool supports_backing; + + bool has_variable_length; + + /* + * Drivers setting this field must be able to work with just a plain + * filename with '<protocol_name>:' as a prefix, and no other options. + * Options may be extracted from the filename by implementing + * bdrv_parse_filename. + */ + const char *protocol_name; + + /* List of options for creating images, terminated by name == NULL */ + QemuOptsList *create_opts; + + /* List of options for image amend */ + QemuOptsList *amend_opts; + + /* + * If this driver supports reopening images this contains a + * NULL-terminated list of the runtime options that can be + * modified. If an option in this list is unspecified during + * reopen then it _must_ be reset to its default value or return + * an error. + */ + const char *const *mutable_opts; + + /* + * Pointer to a NULL-terminated array of names of strong options + * that can be specified for bdrv_open(). A strong option is one + * that changes the data of a BDS. + * If this pointer is NULL, the array is considered empty. + * "filename" and "driver" are always considered strong. + */ + const char *const *strong_runtime_opts; + + + /* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + + /* + * This function is invoked under BQL before .bdrv_co_amend() + * (which in contrast does not necessarily run under the BQL) + * to allow driver-specific initialization code that requires + * the BQL, like setting up specific permission flags. + */ + int (*bdrv_amend_pre_run)(BlockDriverState *bs, Error **errp); + /* + * This function is invoked under BQL after .bdrv_co_amend() + * to allow cleaning up what was done in .bdrv_amend_pre_run(). + */ + void (*bdrv_amend_clean)(BlockDriverState *bs); + + /* + * Return true if @to_replace can be replaced by a BDS with the + * same data as @bs without it affecting @bs's behavior (that is, + * without it being visible to @bs's parents). + */ + bool (*bdrv_recurse_can_replace)(BlockDriverState *bs, + BlockDriverState *to_replace); + + int (*bdrv_probe_device)(const char *filename); + + /* + * Any driver implementing this callback is expected to be able to handle + * NULL file names in its .bdrv_open() implementation. + */ + void (*bdrv_parse_filename)(const char *filename, QDict *options, + Error **errp); + + /* For handling image reopen for split or non-split files. */ + int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp); + void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); + void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state); + void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); + void (*bdrv_join_options)(QDict *options, QDict *old_options); + + int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags, + Error **errp); + + /* Protocol drivers should implement this instead of bdrv_open */ + int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags, + Error **errp); + void (*bdrv_close)(BlockDriverState *bs); + + int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts, + Error **errp); + int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp); + + int (*bdrv_amend_options)(BlockDriverState *bs, + QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, + bool force, + Error **errp); + + int (*bdrv_make_empty)(BlockDriverState *bs); + + /* + * Refreshes the bs->exact_filename field. If that is impossible, + * bs->exact_filename has to be left empty. + */ + void (*bdrv_refresh_filename)(BlockDriverState *bs); + + /* + * Gathers the open options for all children into @target. + * A simple format driver (without backing file support) might + * implement this function like this: + * + * QINCREF(bs->file->bs->full_open_options); + * qdict_put(target, "file", bs->file->bs->full_open_options); + * + * If not specified, the generic implementation will simply put + * all children's options under their respective name. + * + * @backing_overridden is true when bs->backing seems not to be + * the child that would result from opening bs->backing_file. + * Therefore, if it is true, the backing child's options should be + * gathered; otherwise, there is no need since the backing child + * is the one implied by the image header. + * + * Note that ideally this function would not be needed. Every + * block driver which implements it is probably doing something + * shady regarding its runtime option structure. + */ + void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target, + bool backing_overridden); + + /* + * Returns an allocated string which is the directory name of this BDS: It + * will be used to make relative filenames absolute by prepending this + * function's return value to them. + */ + char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp); + + /* + * This informs the driver that we are no longer interested in the result + * of in-flight requests, so don't waste the time if possible. + * + * One example usage is to avoid waiting for an nbd target node reconnect + * timeout during job-cancel with force=true. + */ + void (*bdrv_cancel_in_flight)(BlockDriverState *bs); + + int (*bdrv_inactivate)(BlockDriverState *bs); + + int (*bdrv_snapshot_create)(BlockDriverState *bs, + QEMUSnapshotInfo *sn_info); + int (*bdrv_snapshot_goto)(BlockDriverState *bs, + const char *snapshot_id); + int (*bdrv_snapshot_delete)(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp); + int (*bdrv_snapshot_list)(BlockDriverState *bs, + QEMUSnapshotInfo **psn_info); + int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp); + + int (*bdrv_change_backing_file)(BlockDriverState *bs, + const char *backing_file, const char *backing_fmt); + + /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ + int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, + const char *tag); + int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs, + const char *tag); + int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); + bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); + + void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp); + + /* + * Returns 1 if newly created images are guaranteed to contain only + * zeros, 0 otherwise. + */ + int (*bdrv_has_zero_init)(BlockDriverState *bs); + + /* + * Remove fd handlers, timers, and other event loop callbacks so the event + * loop is no longer in use. Called with no in-flight requests and in + * depth-first traversal order with parents before child nodes. + */ + void (*bdrv_detach_aio_context)(BlockDriverState *bs); + + /* + * Add fd handlers, timers, and other event loop callbacks so I/O requests + * can be processed again. Called with no in-flight requests and in + * depth-first traversal order with child nodes before parent nodes. + */ + void (*bdrv_attach_aio_context)(BlockDriverState *bs, + AioContext *new_context); + + /** + * Try to get @bs's logical and physical block size. + * On success, store them in @bsz and return zero. + * On failure, return negative errno. + */ + int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz); + /** + * Try to get @bs's geometry (cyls, heads, sectors) + * On success, store them in @geo and return 0. + * On failure return -errno. + * Only drivers that want to override guest geometry implement this + * callback; see hd_geometry_guess(). + */ + int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo); + + void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child, + Error **errp); + void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child, + Error **errp); + + /** + * Informs the block driver that a permission change is intended. The + * driver checks whether the change is permissible and may take other + * preparations for the change (e.g. get file system locks). This operation + * is always followed either by a call to either .bdrv_set_perm or + * .bdrv_abort_perm_update. + * + * Checks whether the requested set of cumulative permissions in @perm + * can be granted for accessing @bs and whether no other users are using + * permissions other than those given in @shared (both arguments take + * BLK_PERM_* bitmasks). + * + * If both conditions are met, 0 is returned. Otherwise, -errno is returned + * and errp is set to an error describing the conflict. + */ + int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm, + uint64_t shared, Error **errp); + + /** + * Called to inform the driver that the set of cumulative set of used + * permissions for @bs has changed to @perm, and the set of sharable + * permission to @shared. The driver can use this to propagate changes to + * its children (i.e. request permissions only if a parent actually needs + * them). + * + * This function is only invoked after bdrv_check_perm(), so block drivers + * may rely on preparations made in their .bdrv_check_perm implementation. + */ + void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared); + + /* + * Called to inform the driver that after a previous bdrv_check_perm() + * call, the permission update is not performed and any preparations made + * for it (e.g. taken file locks) need to be undone. + * + * This function can be called even for nodes that never saw a + * bdrv_check_perm() call. It is a no-op then. + */ + void (*bdrv_abort_perm_update)(BlockDriverState *bs); + + /** + * Returns in @nperm and @nshared the permissions that the driver for @bs + * needs on its child @c, based on the cumulative permissions requested by + * the parents in @parent_perm and @parent_shared. + * + * If @c is NULL, return the permissions for attaching a new child for the + * given @child_class and @role. + * + * If @reopen_queue is non-NULL, don't return the currently needed + * permissions, but those that will be needed after applying the + * @reopen_queue. + */ + void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t parent_perm, uint64_t parent_shared, + uint64_t *nperm, uint64_t *nshared); + + /** + * Register/unregister a buffer for I/O. For example, when the driver is + * interested to know the memory areas that will later be used in iovs, so + * that it can do IOMMU mapping with VFIO etc., in order to get better + * performance. In the case of VFIO drivers, this callback is used to do + * DMA mapping for hot buffers. + */ + void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size); + void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host); + + /* + * This field is modified only under the BQL, and is part of + * the global state. + */ + QLIST_ENTRY(BlockDriver) list; + + /* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + + int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); + + int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs, + BlockdevAmendOptions *opts, + bool force, + Error **errp); + + /* aio */ + BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); + BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); + BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, + BlockCompletionFunc *cb, void *opaque); + BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs, + int64_t offset, int bytes, + BlockCompletionFunc *cb, void *opaque); + + int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + + /** + * @offset: position in bytes to read at + * @bytes: number of bytes to read + * @qiov: the buffers to fill with read data + * @flags: currently unused, always 0 + * + * @offset and @bytes will be a multiple of 'request_alignment', + * but the length of individual @qiov elements does not have to + * be a multiple. + * + * @bytes will always equal the total size of @qiov, and will be + * no larger than 'max_transfer'. + * + * The buffer in @qiov may point directly to guest memory. + */ + int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); + + int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs, + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags); + + int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, + int flags); + /** + * @offset: position in bytes to write at + * @bytes: number of bytes to write + * @qiov: the buffers containing data to write + * @flags: zero or more bits allowed by 'supported_write_flags' + * + * @offset and @bytes will be a multiple of 'request_alignment', + * but the length of individual @qiov elements does not have to + * be a multiple. + * + * @bytes will always equal the total size of @qiov, and will be + * no larger than 'max_transfer'. + * + * The buffer in @qiov may point directly to guest memory. + */ + int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); + int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags); + + /* + * Efficiently zero a region of the disk image. Typically an image format + * would use a compact metadata representation to implement this. This + * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev() + * will be called instead. + */ + int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs, + int64_t offset, int64_t bytes, BdrvRequestFlags flags); + int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs, + int64_t offset, int64_t bytes); + + /* + * Map [offset, offset + nbytes) range onto a child of @bs to copy from, + * and invoke bdrv_co_copy_range_from(child, ...), or invoke + * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from. + * + * See the comment of bdrv_co_copy_range for the parameter and return value + * semantics. + */ + int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs, + BdrvChild *src, + int64_t offset, + BdrvChild *dst, + int64_t dst_offset, + int64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + + /* + * Map [offset, offset + nbytes) range onto a child of bs to copy data to, + * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy + * operation if @bs is the leaf and @src has the same BlockDriver. Return + * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver. + * + * See the comment of bdrv_co_copy_range for the parameter and return value + * semantics. + */ + int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs, + BdrvChild *src, + int64_t src_offset, + BdrvChild *dst, + int64_t dst_offset, + int64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + + /* + * Building block for bdrv_block_status[_above] and + * bdrv_is_allocated[_above]. The driver should answer only + * according to the current layer, and should only need to set + * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID, + * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing + * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See + * block.h for the overall meaning of the bits. As a hint, the + * flag want_zero is true if the caller cares more about precise + * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for + * overall allocation (favor larger *pnum, perhaps by reporting + * _DATA instead of _ZERO). The block layer guarantees input + * clamped to bdrv_getlength() and aligned to request_alignment, + * as well as non-NULL pnum, map, and file; in turn, the driver + * must return an error or set pnum to an aligned non-zero value. + * + * Note that @bytes is just a hint on how big of a region the + * caller wants to inspect. It is not a limit on *pnum. + * Implementations are free to return larger values of *pnum if + * doing so does not incur a performance penalty. + * + * block/io.c's bdrv_co_block_status() will utilize an unclamped + * *pnum value for the block-status cache on protocol nodes, prior + * to clamping *pnum for return to its caller. + */ + int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs, + bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file); + + /* + * Snapshot-access API. + * + * Block-driver may provide snapshot-access API: special functions to access + * some internal "snapshot". The functions are similar with normal + * read/block_status/discard handler, but don't have any specific handling + * in generic block-layer: no serializing, no alignment, no tracked + * requests. So, block-driver that realizes these APIs is fully responsible + * for synchronization between snapshot-access API and normal IO requests. + * + * TODO: To be able to support qcow2's internal snapshots, this API will + * need to be extended to: + * - be able to select a specific snapshot + * - receive the snapshot's actual length (which may differ from bs's + * length) + */ + int coroutine_fn (*bdrv_co_preadv_snapshot)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset); + int coroutine_fn (*bdrv_co_snapshot_block_status)(BlockDriverState *bs, + bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file); + int coroutine_fn (*bdrv_co_pdiscard_snapshot)(BlockDriverState *bs, + int64_t offset, int64_t bytes); + + /* + * Invalidate any cached meta-data. + */ + void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs, + Error **errp); + + /* + * Flushes all data for all layers by calling bdrv_co_flush for underlying + * layers, if needed. This function is needed for deterministic + * synchronization of the flush finishing callback. + */ + int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs); + + /* Delete a created file. */ + int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs, + Error **errp); + + /* + * Flushes all data that was already written to the OS all the way down to + * the disk (for example file-posix.c calls fsync()). + */ + int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs); + + /* + * Flushes all internal caches to the OS. The data may still sit in a + * writeback cache of the host OS, but it will survive a crash of the qemu + * process. + */ + int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs); + + /* + * Truncate @bs to @offset bytes using the given @prealloc mode + * when growing. Modes other than PREALLOC_MODE_OFF should be + * rejected when shrinking @bs. + * + * If @exact is true, @bs must be resized to exactly @offset. + * Otherwise, it is sufficient for @bs (if it is a host block + * device and thus there is no way to resize it) to be at least + * @offset bytes in length. + * + * If @exact is true and this function fails but would succeed + * with @exact = false, it should return -ENOTSUP. + */ + int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, + bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp); + int64_t (*bdrv_getlength)(BlockDriverState *bs); + int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs); + BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs, + Error **errp); + + int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov); + int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset); + + int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); + + ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs, + Error **errp); + BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs); + + int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs, + QEMUIOVector *qiov, + int64_t pos); + int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs, + QEMUIOVector *qiov, + int64_t pos); + + /* removable device specific */ + bool (*bdrv_is_inserted)(BlockDriverState *bs); + void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); + void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked); + + /* to control generic scsi devices */ + BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs, + unsigned long int req, void *buf, + BlockCompletionFunc *cb, void *opaque); + int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs, + unsigned long int req, void *buf); + + /* + * Returns 0 for completed check, -errno for internal errors. + * The check results are stored in result. + */ + int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs, + BdrvCheckResult *result, + BdrvCheckMode fix); + + void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event); + + /* io queue for linux-aio */ + void (*bdrv_io_plug)(BlockDriverState *bs); + void (*bdrv_io_unplug)(BlockDriverState *bs); + + /** + * bdrv_co_drain_begin is called if implemented in the beginning of a + * drain operation to drain and stop any internal sources of requests in + * the driver. + * bdrv_co_drain_end is called if implemented at the end of the drain. + * + * They should be used by the driver to e.g. manage scheduled I/O + * requests, or toggle an internal state. After the end of the drain new + * requests will continue normally. + */ + void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); + void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); + + bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); + bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs, + const char *name, + uint32_t granularity, + Error **errp); + int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs, + const char *name, + Error **errp); +}; + +static inline bool block_driver_can_compress(BlockDriver *drv) +{ + return drv->bdrv_co_pwritev_compressed || + drv->bdrv_co_pwritev_compressed_part; +} + +typedef struct BlockLimits { + /* + * Alignment requirement, in bytes, for offset/length of I/O + * requests. Must be a power of 2 less than INT_MAX; defaults to + * 1 for drivers with modern byte interfaces, and to 512 + * otherwise. + */ + uint32_t request_alignment; + + /* + * Maximum number of bytes that can be discarded at once. Must be multiple + * of pdiscard_alignment, but need not be power of 2. May be 0 if no + * inherent 64-bit limit. + */ + int64_t max_pdiscard; + + /* + * Optimal alignment for discard requests in bytes. A power of 2 + * is best but not mandatory. Must be a multiple of + * bl.request_alignment, and must be less than max_pdiscard if + * that is set. May be 0 if bl.request_alignment is good enough + */ + uint32_t pdiscard_alignment; + + /* + * Maximum number of bytes that can zeroized at once. Must be multiple of + * pwrite_zeroes_alignment. 0 means no limit. + */ + int64_t max_pwrite_zeroes; + + /* + * Optimal alignment for write zeroes requests in bytes. A power + * of 2 is best but not mandatory. Must be a multiple of + * bl.request_alignment, and must be less than max_pwrite_zeroes + * if that is set. May be 0 if bl.request_alignment is good + * enough + */ + uint32_t pwrite_zeroes_alignment; + + /* + * Optimal transfer length in bytes. A power of 2 is best but not + * mandatory. Must be a multiple of bl.request_alignment, or 0 if + * no preferred size + */ + uint32_t opt_transfer; + + /* + * Maximal transfer length in bytes. Need not be power of 2, but + * must be multiple of opt_transfer and bl.request_alignment, or 0 + * for no 32-bit limit. For now, anything larger than INT_MAX is + * clamped down. + */ + uint32_t max_transfer; + + /* + * Maximal hardware transfer length in bytes. Applies whenever + * transfers to the device bypass the kernel I/O scheduler, for + * example with SG_IO. If larger than max_transfer or if zero, + * blk_get_max_hw_transfer will fall back to max_transfer. + */ + uint64_t max_hw_transfer; + + /* + * Maximal number of scatter/gather elements allowed by the hardware. + * Applies whenever transfers to the device bypass the kernel I/O + * scheduler, for example with SG_IO. If larger than max_iov + * or if zero, blk_get_max_hw_iov will fall back to max_iov. + */ + int max_hw_iov; + + + /* memory alignment, in bytes so that no bounce buffer is needed */ + size_t min_mem_alignment; + + /* memory alignment, in bytes, for bounce buffer */ + size_t opt_mem_alignment; + + /* maximum number of iovec elements */ + int max_iov; +} BlockLimits; + +typedef struct BdrvOpBlocker BdrvOpBlocker; + +typedef struct BdrvAioNotifier { + void (*attached_aio_context)(AioContext *new_context, void *opaque); + void (*detach_aio_context)(void *opaque); + + void *opaque; + bool deleted; + + QLIST_ENTRY(BdrvAioNotifier) list; +} BdrvAioNotifier; + +struct BdrvChildClass { + /* + * If true, bdrv_replace_node() doesn't change the node this BdrvChild + * points to. + */ + bool stay_at_node; + + /* + * If true, the parent is a BlockDriverState and bdrv_next_all_states() + * will return it. This information is used for drain_all, where every node + * will be drained separately, so the drain only needs to be propagated to + * non-BDS parents. + */ + bool parent_is_bds; + + /* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + void (*inherit_options)(BdrvChildRole role, bool parent_is_format, + int *child_flags, QDict *child_options, + int parent_flags, QDict *parent_options); + void (*change_media)(BdrvChild *child, bool load); + + /* + * Returns a malloced string that describes the parent of the child for a + * human reader. This could be a node-name, BlockBackend name, qdev ID or + * QOM path of the device owning the BlockBackend, job type and ID etc. The + * caller is responsible for freeing the memory. + */ + char *(*get_parent_desc)(BdrvChild *child); + + /* + * Notifies the parent that the child has been activated/inactivated (e.g. + * when migration is completing) and it can start/stop requesting + * permissions and doing I/O on it. + */ + void (*activate)(BdrvChild *child, Error **errp); + int (*inactivate)(BdrvChild *child); + + void (*attach)(BdrvChild *child); + void (*detach)(BdrvChild *child); + + /* + * Notifies the parent that the filename of its child has changed (e.g. + * because the direct child was removed from the backing chain), so that it + * can update its reference. + */ + int (*update_filename)(BdrvChild *child, BlockDriverState *new_base, + const char *filename, Error **errp); + + bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx, + GSList **ignore, Error **errp); + void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore); + + AioContext *(*get_parent_aio_context)(BdrvChild *child); + + /* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + + void (*resize)(BdrvChild *child); + + /* + * Returns a name that is supposedly more useful for human users than the + * node name for identifying the node in question (in particular, a BB + * name), or NULL if the parent can't provide a better name. + */ + const char *(*get_name)(BdrvChild *child); + + /* + * If this pair of functions is implemented, the parent doesn't issue new + * requests after returning from .drained_begin() until .drained_end() is + * called. + * + * These functions must not change the graph (and therefore also must not + * call aio_poll(), which could change the graph indirectly). + * + * If drained_end() schedules background operations, it must atomically + * increment *drained_end_counter for each such operation and atomically + * decrement it once the operation has settled. + * + * Note that this can be nested. If drained_begin() was called twice, new + * I/O is allowed only after drained_end() was called twice, too. + */ + void (*drained_begin)(BdrvChild *child); + void (*drained_end)(BdrvChild *child, int *drained_end_counter); + + /* + * Returns whether the parent has pending requests for the child. This + * callback is polled after .drained_begin() has been called until all + * activity on the child has stopped. + */ + bool (*drained_poll)(BdrvChild *child); +}; + +extern const BdrvChildClass child_of_bds; + +struct BdrvChild { + BlockDriverState *bs; + char *name; + const BdrvChildClass *klass; + BdrvChildRole role; + void *opaque; + + /** + * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask) + */ + uint64_t perm; + + /** + * Permissions that can still be granted to other users of @bs while this + * BdrvChild is still attached to it. (BLK_PERM_* bitmask) + */ + uint64_t shared_perm; + + /* + * This link is frozen: the child can neither be replaced nor + * detached from the parent. + */ + bool frozen; + + /* + * How many times the parent of this child has been drained + * (through klass->drained_*). + * Usually, this is equal to bs->quiesce_counter (potentially + * reduced by bdrv_drain_all_count). It may differ while the + * child is entering or leaving a drained section. + */ + int parent_quiesce_counter; + + QLIST_ENTRY(BdrvChild) next; + QLIST_ENTRY(BdrvChild) next_parent; +}; + +/* + * Allows bdrv_co_block_status() to cache one data region for a + * protocol node. + * + * @valid: Whether the cache is valid (should be accessed with atomic + * functions so this can be reset by RCU readers) + * @data_start: Offset where we know (or strongly assume) is data + * @data_end: Offset where the data region ends (which is not necessarily + * the start of a zeroed region) + */ +typedef struct BdrvBlockStatusCache { + struct rcu_head rcu; + + bool valid; + int64_t data_start; + int64_t data_end; +} BdrvBlockStatusCache; + +struct BlockDriverState { + /* + * Protected by big QEMU lock or read-only after opening. No special + * locking needed during I/O... + */ + int open_flags; /* flags used to open the file, re-used for re-open */ + bool encrypted; /* if true, the media is encrypted */ + bool sg; /* if true, the device is a /dev/sg* */ + bool probed; /* if true, format was probed rather than specified */ + bool force_share; /* if true, always allow all shared permissions */ + bool implicit; /* if true, this filter node was automatically inserted */ + + BlockDriver *drv; /* NULL means no media */ + void *opaque; + + AioContext *aio_context; /* event loop used for fd handlers, timers, etc */ + /* + * long-running tasks intended to always use the same AioContext as this + * BDS may register themselves in this list to be notified of changes + * regarding this BDS's context + */ + QLIST_HEAD(, BdrvAioNotifier) aio_notifiers; + bool walking_aio_notifiers; /* to make removal during iteration safe */ + + char filename[PATH_MAX]; + /* + * If not empty, this image is a diff in relation to backing_file. + * Note that this is the name given in the image header and + * therefore may or may not be equal to .backing->bs->filename. + * If this field contains a relative path, it is to be resolved + * relatively to the overlay's location. + */ + char backing_file[PATH_MAX]; + /* + * The backing filename indicated by the image header. Contrary + * to backing_file, if we ever open this file, auto_backing_file + * is replaced by the resulting BDS's filename (i.e. after a + * bdrv_refresh_filename() run). + */ + char auto_backing_file[PATH_MAX]; + char backing_format[16]; /* if non-zero and backing_file exists */ + + QDict *full_open_options; + char exact_filename[PATH_MAX]; + + BdrvChild *backing; + BdrvChild *file; + + /* I/O Limits */ + BlockLimits bl; + + /* + * Flags honored during pread + */ + unsigned int supported_read_flags; + /* + * Flags honored during pwrite (so far: BDRV_REQ_FUA, + * BDRV_REQ_WRITE_UNCHANGED). + * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those + * writes will be issued as normal writes without the flag set. + * This is important to note for drivers that do not explicitly + * request a WRITE permission for their children and instead take + * the same permissions as their parent did (this is commonly what + * block filters do). Such drivers have to be aware that the + * parent may have taken a WRITE_UNCHANGED permission only and is + * issuing such requests. Drivers either must make sure that + * these requests do not result in plain WRITE accesses (usually + * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding + * every incoming write request as-is, including potentially that + * flag), or they have to explicitly take the WRITE permission for + * their children. + */ + unsigned int supported_write_flags; + /* + * Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, + * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) + */ + unsigned int supported_zero_flags; + /* + * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE). + * + * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure + * that any added space reads as all zeros. If this can't be guaranteed, + * the operation must fail. + */ + unsigned int supported_truncate_flags; + + /* the following member gives a name to every node on the bs graph. */ + char node_name[32]; + /* element of the list of named nodes building the graph */ + QTAILQ_ENTRY(BlockDriverState) node_list; + /* element of the list of all BlockDriverStates (all_bdrv_states) */ + QTAILQ_ENTRY(BlockDriverState) bs_list; + /* element of the list of monitor-owned BDS */ + QTAILQ_ENTRY(BlockDriverState) monitor_list; + int refcnt; + + /* operation blockers. Protected by BQL. */ + QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX]; + + /* + * The node that this node inherited default options from (and a reopen on + * which can affect this node by changing these defaults). This is always a + * parent node of this node. + */ + BlockDriverState *inherits_from; + QLIST_HEAD(, BdrvChild) children; + QLIST_HEAD(, BdrvChild) parents; + + QDict *options; + QDict *explicit_options; + BlockdevDetectZeroesOptions detect_zeroes; + + /* The error object in use for blocking operations on backing_hd */ + Error *backing_blocker; + + /* Protected by AioContext lock */ + + /* + * If we are reading a disk image, give its size in sectors. + * Generally read-only; it is written to by load_snapshot and + * save_snaphost, but the block layer is quiescent during those. + */ + int64_t total_sectors; + + /* threshold limit for writes, in bytes. "High water mark". */ + uint64_t write_threshold_offset; + + /* + * Writing to the list requires the BQL _and_ the dirty_bitmap_mutex. + * Reading from the list can be done with either the BQL or the + * dirty_bitmap_mutex. Modifying a bitmap only requires + * dirty_bitmap_mutex. + */ + QemuMutex dirty_bitmap_mutex; + QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; + + /* Offset after the highest byte written to */ + Stat64 wr_highest_offset; + + /* + * If true, copy read backing sectors into image. Can be >1 if more + * than one client has requested copy-on-read. Accessed with atomic + * ops. + */ + int copy_on_read; + + /* + * number of in-flight requests; overall and serialising. + * Accessed with atomic ops. + */ + unsigned int in_flight; + unsigned int serialising_in_flight; + + /* + * counter for nested bdrv_io_plug. + * Accessed with atomic ops. + */ + unsigned io_plugged; + + /* do we need to tell the quest if we have a volatile write cache? */ + int enable_write_cache; + + /* Accessed with atomic ops. */ + int quiesce_counter; + int recursive_quiesce_counter; + + unsigned int write_gen; /* Current data generation */ + + /* Protected by reqs_lock. */ + CoMutex reqs_lock; + QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; + CoQueue flush_queue; /* Serializing flush queue */ + bool active_flush_req; /* Flush request in flight? */ + + /* Only read/written by whoever has set active_flush_req to true. */ + unsigned int flushed_gen; /* Flushed write generation */ + + /* BdrvChild links to this node may never be frozen */ + bool never_freeze; + + /* Lock for block-status cache RCU writers */ + CoMutex bsc_modify_lock; + /* Always non-NULL, but must only be dereferenced under an RCU read guard */ + BdrvBlockStatusCache *block_status_cache; +}; + +struct BlockBackendRootState { + int open_flags; + BlockdevDetectZeroesOptions detect_zeroes; +}; + +typedef enum BlockMirrorBackingMode { + /* + * Reuse the existing backing chain from the source for the target. + * - sync=full: Set backing BDS to NULL. + * - sync=top: Use source's backing BDS. + * - sync=none: Use source as the backing BDS. + */ + MIRROR_SOURCE_BACKING_CHAIN, + + /* Open the target's backing chain completely anew */ + MIRROR_OPEN_BACKING_CHAIN, + + /* Do not change the target's backing BDS after job completion */ + MIRROR_LEAVE_BACKING_CHAIN, +} BlockMirrorBackingMode; + + +/* + * Essential block drivers which must always be statically linked into qemu, and + * which therefore can be accessed without using bdrv_find_format() + */ +extern BlockDriver bdrv_file; +extern BlockDriver bdrv_raw; +extern BlockDriver bdrv_qcow2; + +extern unsigned int bdrv_drain_all_count; +extern QemuOptsList bdrv_create_opts_simple; + +/* + * Common functions that are neither I/O nor Global State. + * + * See include/block/block-commmon.h for more information about + * the Common API. + */ + +static inline BlockDriverState *child_bs(BdrvChild *child) +{ + return child ? child->bs : NULL; +} + +int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp); +int get_tmp_filename(char *filename, int size); +void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, + QDict *options); + + +int bdrv_check_qiov_request(int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + Error **errp); + +#ifdef _WIN32 +int is_windows_drive(const char *filename); +#endif + +#endif /* BLOCK_INT_COMMON_H */ diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h new file mode 100644 index 0000000000..0f21b0570b --- /dev/null +++ b/include/block/block_int-global-state.h @@ -0,0 +1,329 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_INT_GLOBAL_STATE_H +#define BLOCK_INT_GLOBAL_STATE_H + +#include "block_int-common.h" + +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + +/** + * stream_start: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Block device to operate on. + * @base: Block device that will become the new base, or %NULL to + * flatten the whole backing file chain onto @bs. + * @backing_file_str: The file name that will be written to @bs as the + * the new backing file if the job completes. Ignored if @base is %NULL. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the stream job inserts into the graph above + * @bs. NULL means that a node name should be autogenerated. + * @errp: Error object. + * + * Start a streaming operation on @bs. Clusters that are unallocated + * in @bs, but allocated in any image between @base and @bs (both + * exclusive) will be written to @bs. At the end of a successful + * streaming job, the backing file of @bs will be changed to + * @backing_file_str in the written image and to @base in the live + * BlockDriverState. + */ +void stream_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *base, const char *backing_file_str, + BlockDriverState *bottom, + int creation_flags, int64_t speed, + BlockdevOnError on_error, + const char *filter_node_name, + Error **errp); + +/** + * commit_start: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Active block device. + * @top: Top block device to be committed. + * @base: Block device that will be written into, and become the new top. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @backing_file_str: String to use as the backing file in @top's overlay + * @filter_node_name: The node name that should be assigned to the filter + * driver that the commit job inserts into the graph above @top. NULL means + * that a node name should be autogenerated. + * @errp: Error object. + * + */ +void commit_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *base, BlockDriverState *top, + int creation_flags, int64_t speed, + BlockdevOnError on_error, const char *backing_file_str, + const char *filter_node_name, Error **errp); +/** + * commit_active_start: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Active block device to be committed. + * @base: Block device that will be written into, and become the new top. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the commit job inserts into the graph above @bs. NULL means that + * a node name should be autogenerated. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @auto_complete: Auto complete the job. + * @errp: Error object. + * + */ +BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *base, int creation_flags, + int64_t speed, BlockdevOnError on_error, + const char *filter_node_name, + BlockCompletionFunc *cb, void *opaque, + bool auto_complete, Error **errp); +/* + * mirror_start: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Block device to operate on. + * @target: Block device to write to. + * @replaces: Block graph node name to replace once the mirror is done. Can + * only be used when full mirroring is selected. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @granularity: The chosen granularity for the dirty bitmap. + * @buf_size: The amount of data that can be in flight at one time. + * @mode: Whether to collapse all images in the chain to the target. + * @backing_mode: How to establish the target's backing chain after completion. + * @zero_target: Whether the target should be explicitly zero-initialized + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. + * @unmap: Whether to unmap target where source sectors only contain zeroes. + * @filter_node_name: The node name that should be assigned to the filter + * driver that the mirror job inserts into the graph above @bs. NULL means that + * a node name should be autogenerated. + * @copy_mode: When to trigger writes to the target. + * @errp: Error object. + * + * Start a mirroring operation on @bs. Clusters that are allocated + * in @bs will be written to @target until the job is cancelled or + * manually completed. At the end of a successful mirroring job, + * @bs will be switched to read from @target. + */ +void mirror_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *target, const char *replaces, + int creation_flags, int64_t speed, + uint32_t granularity, int64_t buf_size, + MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, + bool zero_target, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + bool unmap, const char *filter_node_name, + MirrorCopyMode copy_mode, Error **errp); + +/* + * backup_job_create: + * @job_id: The id of the newly-created job, or %NULL to use the + * device name of @bs. + * @bs: Block device to operate on. + * @target: Block device to write to. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @sync_mode: What parts of the disk image should be copied to the destination. + * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental' + * @bitmap_mode: The bitmap synchronization policy to use. + * @perf: Performance options. All actual fields assumed to be present, + * all ".has_*" fields are ignored. + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. + * @creation_flags: Flags that control the behavior of the Job lifetime. + * See @BlockJobCreateFlags + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @txn: Transaction that this job is part of (may be NULL). + * + * Create a backup operation on @bs. Clusters in @bs are written to @target + * until the job is cancelled or manually completed. + */ +BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + BlockDriverState *target, int64_t speed, + MirrorSyncMode sync_mode, + BdrvDirtyBitmap *sync_bitmap, + BitmapSyncMode bitmap_mode, + bool compress, + const char *filter_node_name, + BackupPerf *perf, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + int creation_flags, + BlockCompletionFunc *cb, void *opaque, + JobTxn *txn, Error **errp); + +BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, + const char *child_name, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + uint64_t perm, uint64_t shared_perm, + void *opaque, Error **errp); +void bdrv_root_unref_child(BdrvChild *child); + +void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, + uint64_t *shared_perm); + +/** + * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use + * bdrv_child_refresh_perms() instead and make the parent's + * .bdrv_child_perm() implementation return the correct values. + */ +int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, + Error **errp); + +/** + * Calls bs->drv->bdrv_child_perm() and updates the child's permission + * masks with the result. + * Drivers should invoke this function whenever an event occurs that + * makes their .bdrv_child_perm() implementation return different + * values than before, but which will not result in the block layer + * automatically refreshing the permissions. + */ +int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp); + +bool bdrv_recurse_can_replace(BlockDriverState *bs, + BlockDriverState *to_replace); + +/* + * Default implementation for BlockDriver.bdrv_child_perm() that can + * be used by block filters and image formats, as long as they use the + * child_of_bds child class and set an appropriate BdrvChildRole. + */ +void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared); + +void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); +bool blk_dev_has_removable_media(BlockBackend *blk); +void blk_dev_eject_request(BlockBackend *blk, bool force); +bool blk_dev_is_medium_locked(BlockBackend *blk); + +void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup); + +void bdrv_set_monitor_owned(BlockDriverState *bs); + +void blockdev_close_all_bdrv_states(void); + +BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp); + +/** + * Simple implementation of bdrv_co_create_opts for protocol drivers + * which only support creation via opening a file + * (usually existing raw storage device) + */ +int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, + const char *filename, + QemuOpts *opts, + Error **errp); + +BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, + const char *name, + BlockDriverState **pbs, + Error **errp); +BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, + BlockDirtyBitmapMergeSourceList *bms, + HBitmap **backup, Error **errp); +BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, + bool release, + BlockDriverState **bitmap_bs, + Error **errp); + + +BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs); + +/** + * bdrv_add_aio_context_notifier: + * + * If a long-running job intends to be always run in the same AioContext as a + * certain BDS, it may use this function to be notified of changes regarding the + * association of the BDS to an AioContext. + * + * attached_aio_context() is called after the target BDS has been attached to a + * new AioContext; detach_aio_context() is called before the target BDS is being + * detached from its old AioContext. + */ +void bdrv_add_aio_context_notifier(BlockDriverState *bs, + void (*attached_aio_context)(AioContext *new_context, void *opaque), + void (*detach_aio_context)(void *opaque), void *opaque); + +/** + * bdrv_remove_aio_context_notifier: + * + * Unsubscribe of change notifications regarding the BDS's AioContext. The + * parameters given here have to be the same as those given to + * bdrv_add_aio_context_notifier(). + */ +void bdrv_remove_aio_context_notifier(BlockDriverState *bs, + void (*aio_context_attached)(AioContext *, + void *), + void (*aio_context_detached)(void *), + void *opaque); + +/** + * End all quiescent sections started by bdrv_drain_all_begin(). This is + * needed when deleting a BDS before bdrv_drain_all_end() is called. + * + * NOTE: this is an internal helper for bdrv_close() *only*. No one else + * should call it. + */ +void bdrv_drain_all_end_quiesce(BlockDriverState *bs); + +/** + * Make sure that the function is running under both drain and BQL. + * The latter protects from concurrent writings + * from the GS API, while the former prevents concurrent reads + * from I/O. + */ +static inline void assert_bdrv_graph_writable(BlockDriverState *bs) +{ + /* + * TODO: this function is incomplete. Because the users of this + * assert lack the necessary drains, check only for BQL. + * Once the necessary drains are added, + * assert also for qatomic_read(&bs->quiesce_counter) > 0 + */ + assert(qemu_in_main_thread()); +} + +#endif /* BLOCK_INT_GLOBAL_STATE */ diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h new file mode 100644 index 0000000000..bb454200e5 --- /dev/null +++ b/include/block/block_int-io.h @@ -0,0 +1,194 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_INT_IO_H +#define BLOCK_INT_IO_H + +#include "block_int-common.h" + +/* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + +int coroutine_fn bdrv_co_preadv_snapshot(BdrvChild *child, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset); +int coroutine_fn bdrv_co_snapshot_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file); +int coroutine_fn bdrv_co_pdiscard_snapshot(BlockDriverState *bs, + int64_t offset, int64_t bytes); + + +int coroutine_fn bdrv_co_preadv(BdrvChild *child, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); +int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); +int coroutine_fn bdrv_co_pwritev(BdrvChild *child, + int64_t offset, int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); +int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); + +static inline int coroutine_fn bdrv_co_pread(BdrvChild *child, + int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_CODE(); + + return bdrv_co_preadv(child, offset, bytes, &qiov, flags); +} + +static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child, + int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_CODE(); + + return bdrv_co_pwritev(child, offset, bytes, &qiov, flags); +} + +bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req, + uint64_t align); +BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs); + +BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, + const char *filename); + +/** + * bdrv_wakeup: + * @bs: The BlockDriverState for which an I/O operation has been completed. + * + * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During + * synchronous I/O on a BlockDriverState that is attached to another + * I/O thread, the main thread lets the I/O thread's event loop run, + * waiting for the I/O operation to complete. A bdrv_wakeup will wake + * up the main thread if necessary. + * + * Manual calls to bdrv_wakeup are rarely necessary, because + * bdrv_dec_in_flight already calls it. + */ +void bdrv_wakeup(BlockDriverState *bs); + +const char *bdrv_get_parent_name(const BlockDriverState *bs); +bool blk_dev_has_tray(BlockBackend *blk); +bool blk_dev_is_tray_open(BlockBackend *blk); + +void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes); + +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); +bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest, + const BdrvDirtyBitmap *src, + HBitmap **backup, bool lock); + +void bdrv_inc_in_flight(BlockDriverState *bs); +void bdrv_dec_in_flight(BlockDriverState *bs); + +int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset, + BdrvChild *dst, int64_t dst_offset, + int64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); +int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset, + BdrvChild *dst, int64_t dst_offset, + int64_t bytes, + BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + +int refresh_total_sectors(BlockDriverState *bs, int64_t hint); + +BdrvChild *bdrv_cow_child(BlockDriverState *bs); +BdrvChild *bdrv_filter_child(BlockDriverState *bs); +BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs); +BdrvChild *bdrv_primary_child(BlockDriverState *bs); +BlockDriverState *bdrv_skip_filters(BlockDriverState *bs); +BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs); + +static inline BlockDriverState *bdrv_cow_bs(BlockDriverState *bs) +{ + IO_CODE(); + return child_bs(bdrv_cow_child(bs)); +} + +static inline BlockDriverState *bdrv_filter_bs(BlockDriverState *bs) +{ + IO_CODE(); + return child_bs(bdrv_filter_child(bs)); +} + +static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs) +{ + IO_CODE(); + return child_bs(bdrv_filter_or_cow_child(bs)); +} + +static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs) +{ + IO_CODE(); + return child_bs(bdrv_primary_child(bs)); +} + +/** + * Check whether the given offset is in the cached block-status data + * region. + * + * If it is, and @pnum is not NULL, *pnum is set to + * `bsc.data_end - offset`, i.e. how many bytes, starting from + * @offset, are data (according to the cache). + * Otherwise, *pnum is not touched. + */ +bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum); + +/** + * If [offset, offset + bytes) overlaps with the currently cached + * block-status region, invalidate the cache. + * + * (To be used by I/O paths that cause data regions to be zero or + * holes.) + */ +void bdrv_bsc_invalidate_range(BlockDriverState *bs, + int64_t offset, int64_t bytes); + +/** + * Mark the range [offset, offset + bytes) as a data region. + */ +void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); + + +/* + * "I/O or GS" API functions. These functions can run without + * the BQL, but only in one specific iothread/main loop. + * + * See include/block/block-io.h for more information about + * the "I/O or GS" API. + */ + +void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); +void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); + +#endif /* BLOCK_INT_IO_H */ diff --git a/include/block/block_int.h b/include/block/block_int.h index 27008cfb22..7d50b6bbd1 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -24,1478 +24,9 @@ #ifndef BLOCK_INT_H #define BLOCK_INT_H -#include "block/accounting.h" -#include "block/block.h" -#include "block/aio-wait.h" -#include "qemu/queue.h" -#include "qemu/coroutine.h" -#include "qemu/stats64.h" -#include "qemu/timer.h" -#include "qemu/hbitmap.h" -#include "block/snapshot.h" -#include "qemu/throttle.h" -#include "qemu/rcu.h" +#include "block_int-global-state.h" +#include "block_int-io.h" -#define BLOCK_FLAG_LAZY_REFCOUNTS 8 - -#define BLOCK_OPT_SIZE "size" -#define BLOCK_OPT_ENCRYPT "encryption" -#define BLOCK_OPT_ENCRYPT_FORMAT "encrypt.format" -#define BLOCK_OPT_COMPAT6 "compat6" -#define BLOCK_OPT_HWVERSION "hwversion" -#define BLOCK_OPT_BACKING_FILE "backing_file" -#define BLOCK_OPT_BACKING_FMT "backing_fmt" -#define BLOCK_OPT_CLUSTER_SIZE "cluster_size" -#define BLOCK_OPT_TABLE_SIZE "table_size" -#define BLOCK_OPT_PREALLOC "preallocation" -#define BLOCK_OPT_SUBFMT "subformat" -#define BLOCK_OPT_COMPAT_LEVEL "compat" -#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" -#define BLOCK_OPT_ADAPTER_TYPE "adapter_type" -#define BLOCK_OPT_REDUNDANCY "redundancy" -#define BLOCK_OPT_NOCOW "nocow" -#define BLOCK_OPT_EXTENT_SIZE_HINT "extent_size_hint" -#define BLOCK_OPT_OBJECT_SIZE "object_size" -#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits" -#define BLOCK_OPT_DATA_FILE "data_file" -#define BLOCK_OPT_DATA_FILE_RAW "data_file_raw" -#define BLOCK_OPT_COMPRESSION_TYPE "compression_type" -#define BLOCK_OPT_EXTL2 "extended_l2" - -#define BLOCK_PROBE_BUF_SIZE 512 - -enum BdrvTrackedRequestType { - BDRV_TRACKED_READ, - BDRV_TRACKED_WRITE, - BDRV_TRACKED_DISCARD, - BDRV_TRACKED_TRUNCATE, -}; - -/* - * That is not quite good that BdrvTrackedRequest structure is public, - * as block/io.c is very careful about incoming offset/bytes being - * correct. Be sure to assert bdrv_check_request() succeeded after any - * modification of BdrvTrackedRequest object out of block/io.c - */ -typedef struct BdrvTrackedRequest { - BlockDriverState *bs; - int64_t offset; - int64_t bytes; - enum BdrvTrackedRequestType type; - - bool serialising; - int64_t overlap_offset; - int64_t overlap_bytes; - - QLIST_ENTRY(BdrvTrackedRequest) list; - Coroutine *co; /* owner, used for deadlock detection */ - CoQueue wait_queue; /* coroutines blocked on this request */ - - struct BdrvTrackedRequest *waiting_for; -} BdrvTrackedRequest; - -int bdrv_check_qiov_request(int64_t offset, int64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, - Error **errp); -int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp); - -struct BlockDriver { - const char *format_name; - int instance_size; - - /* set to true if the BlockDriver is a block filter. Block filters pass - * certain callbacks that refer to data (see block.c) to their bs->file - * or bs->backing (whichever one exists) if the driver doesn't implement - * them. Drivers that do not wish to forward must implement them and return - * -ENOTSUP. - * Note that filters are not allowed to modify data. - * - * Filters generally cannot have more than a single filtered child, - * because the data they present must at all times be the same as - * that on their filtered child. That would be impossible to - * achieve for multiple filtered children. - * (And this filtered child must then be bs->file or bs->backing.) - */ - bool is_filter; - /* - * Set to true if the BlockDriver is a format driver. Format nodes - * generally do not expect their children to be other format nodes - * (except for backing files), and so format probing is disabled - * on those children. - */ - bool is_format; - /* - * Return true if @to_replace can be replaced by a BDS with the - * same data as @bs without it affecting @bs's behavior (that is, - * without it being visible to @bs's parents). - */ - bool (*bdrv_recurse_can_replace)(BlockDriverState *bs, - BlockDriverState *to_replace); - - int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); - int (*bdrv_probe_device)(const char *filename); - - /* Any driver implementing this callback is expected to be able to handle - * NULL file names in its .bdrv_open() implementation */ - void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp); - /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have - * this field set to true, except ones that are defined only by their - * child's bs. - * An example of the last type will be the quorum block driver. - */ - bool bdrv_needs_filename; - - /* - * Set if a driver can support backing files. This also implies the - * following semantics: - * - * - Return status 0 of .bdrv_co_block_status means that corresponding - * blocks are not allocated in this layer of backing-chain - * - For such (unallocated) blocks, read will: - * - fill buffer with zeros if there is no backing file - * - read from the backing file otherwise, where the block layer - * takes care of reading zeros beyond EOF if backing file is short - */ - bool supports_backing; - - /* For handling image reopen for split or non-split files */ - int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, - BlockReopenQueue *queue, Error **errp); - void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); - void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state); - void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); - void (*bdrv_join_options)(QDict *options, QDict *old_options); - - int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags, - Error **errp); - - /* Protocol drivers should implement this instead of bdrv_open */ - int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags, - Error **errp); - void (*bdrv_close)(BlockDriverState *bs); - - - int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts, - Error **errp); - int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv, - const char *filename, - QemuOpts *opts, - Error **errp); - - int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs, - BlockdevAmendOptions *opts, - bool force, - Error **errp); - - int (*bdrv_amend_options)(BlockDriverState *bs, - QemuOpts *opts, - BlockDriverAmendStatusCB *status_cb, - void *cb_opaque, - bool force, - Error **errp); - - int (*bdrv_make_empty)(BlockDriverState *bs); - - /* - * Refreshes the bs->exact_filename field. If that is impossible, - * bs->exact_filename has to be left empty. - */ - void (*bdrv_refresh_filename)(BlockDriverState *bs); - - /* - * Gathers the open options for all children into @target. - * A simple format driver (without backing file support) might - * implement this function like this: - * - * QINCREF(bs->file->bs->full_open_options); - * qdict_put(target, "file", bs->file->bs->full_open_options); - * - * If not specified, the generic implementation will simply put - * all children's options under their respective name. - * - * @backing_overridden is true when bs->backing seems not to be - * the child that would result from opening bs->backing_file. - * Therefore, if it is true, the backing child's options should be - * gathered; otherwise, there is no need since the backing child - * is the one implied by the image header. - * - * Note that ideally this function would not be needed. Every - * block driver which implements it is probably doing something - * shady regarding its runtime option structure. - */ - void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target, - bool backing_overridden); - - /* - * Returns an allocated string which is the directory name of this BDS: It - * will be used to make relative filenames absolute by prepending this - * function's return value to them. - */ - char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp); - - /* aio */ - BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); - BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); - BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, - BlockCompletionFunc *cb, void *opaque); - BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs, - int64_t offset, int bytes, - BlockCompletionFunc *cb, void *opaque); - - int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); - - /** - * @offset: position in bytes to read at - * @bytes: number of bytes to read - * @qiov: the buffers to fill with read data - * @flags: currently unused, always 0 - * - * @offset and @bytes will be a multiple of 'request_alignment', - * but the length of individual @qiov elements does not have to - * be a multiple. - * - * @bytes will always equal the total size of @qiov, and will be - * no larger than 'max_transfer'. - * - * The buffer in @qiov may point directly to guest memory. - */ - int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); - int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs, - int64_t offset, int64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); - int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags); - /** - * @offset: position in bytes to write at - * @bytes: number of bytes to write - * @qiov: the buffers containing data to write - * @flags: zero or more bits allowed by 'supported_write_flags' - * - * @offset and @bytes will be a multiple of 'request_alignment', - * but the length of individual @qiov elements does not have to - * be a multiple. - * - * @bytes will always equal the total size of @qiov, and will be - * no larger than 'max_transfer'. - * - * The buffer in @qiov may point directly to guest memory. - */ - int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); - int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, - BdrvRequestFlags flags); - - /* - * Efficiently zero a region of the disk image. Typically an image format - * would use a compact metadata representation to implement this. This - * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev() - * will be called instead. - */ - int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs, - int64_t offset, int64_t bytes, BdrvRequestFlags flags); - int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs, - int64_t offset, int64_t bytes); - - /* Map [offset, offset + nbytes) range onto a child of @bs to copy from, - * and invoke bdrv_co_copy_range_from(child, ...), or invoke - * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from. - * - * See the comment of bdrv_co_copy_range for the parameter and return value - * semantics. - */ - int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs, - BdrvChild *src, - int64_t offset, - BdrvChild *dst, - int64_t dst_offset, - int64_t bytes, - BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); - - /* Map [offset, offset + nbytes) range onto a child of bs to copy data to, - * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy - * operation if @bs is the leaf and @src has the same BlockDriver. Return - * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver. - * - * See the comment of bdrv_co_copy_range for the parameter and return value - * semantics. - */ - int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs, - BdrvChild *src, - int64_t src_offset, - BdrvChild *dst, - int64_t dst_offset, - int64_t bytes, - BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); - - /* - * Building block for bdrv_block_status[_above] and - * bdrv_is_allocated[_above]. The driver should answer only - * according to the current layer, and should only need to set - * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID, - * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing - * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See - * block.h for the overall meaning of the bits. As a hint, the - * flag want_zero is true if the caller cares more about precise - * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for - * overall allocation (favor larger *pnum, perhaps by reporting - * _DATA instead of _ZERO). The block layer guarantees input - * clamped to bdrv_getlength() and aligned to request_alignment, - * as well as non-NULL pnum, map, and file; in turn, the driver - * must return an error or set pnum to an aligned non-zero value. - * - * Note that @bytes is just a hint on how big of a region the - * caller wants to inspect. It is not a limit on *pnum. - * Implementations are free to return larger values of *pnum if - * doing so does not incur a performance penalty. - * - * block/io.c's bdrv_co_block_status() will utilize an unclamped - * *pnum value for the block-status cache on protocol nodes, prior - * to clamping *pnum for return to its caller. - */ - int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs, - bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, - int64_t *map, BlockDriverState **file); - - /* - * This informs the driver that we are no longer interested in the result - * of in-flight requests, so don't waste the time if possible. - * - * One example usage is to avoid waiting for an nbd target node reconnect - * timeout during job-cancel with force=true. - */ - void (*bdrv_cancel_in_flight)(BlockDriverState *bs); - - /* - * Invalidate any cached meta-data. - */ - void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs, - Error **errp); - int (*bdrv_inactivate)(BlockDriverState *bs); - - /* - * Flushes all data for all layers by calling bdrv_co_flush for underlying - * layers, if needed. This function is needed for deterministic - * synchronization of the flush finishing callback. - */ - int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs); - - /* Delete a created file. */ - int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs, - Error **errp); - - /* - * Flushes all data that was already written to the OS all the way down to - * the disk (for example file-posix.c calls fsync()). - */ - int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs); - - /* - * Flushes all internal caches to the OS. The data may still sit in a - * writeback cache of the host OS, but it will survive a crash of the qemu - * process. - */ - int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs); - - /* - * Drivers setting this field must be able to work with just a plain - * filename with '<protocol_name>:' as a prefix, and no other options. - * Options may be extracted from the filename by implementing - * bdrv_parse_filename. - */ - const char *protocol_name; - - /* - * Truncate @bs to @offset bytes using the given @prealloc mode - * when growing. Modes other than PREALLOC_MODE_OFF should be - * rejected when shrinking @bs. - * - * If @exact is true, @bs must be resized to exactly @offset. - * Otherwise, it is sufficient for @bs (if it is a host block - * device and thus there is no way to resize it) to be at least - * @offset bytes in length. - * - * If @exact is true and this function fails but would succeed - * with @exact = false, it should return -ENOTSUP. - */ - int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, - bool exact, PreallocMode prealloc, - BdrvRequestFlags flags, Error **errp); - - int64_t (*bdrv_getlength)(BlockDriverState *bs); - bool has_variable_length; - int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs); - BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs, - Error **errp); - - int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs, - int64_t offset, int64_t bytes, QEMUIOVector *qiov); - int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset); - - int (*bdrv_snapshot_create)(BlockDriverState *bs, - QEMUSnapshotInfo *sn_info); - int (*bdrv_snapshot_goto)(BlockDriverState *bs, - const char *snapshot_id); - int (*bdrv_snapshot_delete)(BlockDriverState *bs, - const char *snapshot_id, - const char *name, - Error **errp); - int (*bdrv_snapshot_list)(BlockDriverState *bs, - QEMUSnapshotInfo **psn_info); - int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs, - const char *snapshot_id, - const char *name, - Error **errp); - int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); - ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs, - Error **errp); - BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs); - - int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs, - QEMUIOVector *qiov, - int64_t pos); - int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs, - QEMUIOVector *qiov, - int64_t pos); - - int (*bdrv_change_backing_file)(BlockDriverState *bs, - const char *backing_file, const char *backing_fmt); - - /* removable device specific */ - bool (*bdrv_is_inserted)(BlockDriverState *bs); - void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); - void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked); - - /* to control generic scsi devices */ - BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs, - unsigned long int req, void *buf, - BlockCompletionFunc *cb, void *opaque); - int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs, - unsigned long int req, void *buf); - - /* List of options for creating images, terminated by name == NULL */ - QemuOptsList *create_opts; - - /* List of options for image amend */ - QemuOptsList *amend_opts; - - /* - * If this driver supports reopening images this contains a - * NULL-terminated list of the runtime options that can be - * modified. If an option in this list is unspecified during - * reopen then it _must_ be reset to its default value or return - * an error. - */ - const char *const *mutable_opts; - - /* - * Returns 0 for completed check, -errno for internal errors. - * The check results are stored in result. - */ - int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs, - BdrvCheckResult *result, - BdrvCheckMode fix); - - void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event); - - /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ - int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, - const char *tag); - int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs, - const char *tag); - int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); - bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); - - void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp); - - /* - * Returns 1 if newly created images are guaranteed to contain only - * zeros, 0 otherwise. - */ - int (*bdrv_has_zero_init)(BlockDriverState *bs); - - /* Remove fd handlers, timers, and other event loop callbacks so the event - * loop is no longer in use. Called with no in-flight requests and in - * depth-first traversal order with parents before child nodes. - */ - void (*bdrv_detach_aio_context)(BlockDriverState *bs); - - /* Add fd handlers, timers, and other event loop callbacks so I/O requests - * can be processed again. Called with no in-flight requests and in - * depth-first traversal order with child nodes before parent nodes. - */ - void (*bdrv_attach_aio_context)(BlockDriverState *bs, - AioContext *new_context); - - /* io queue for linux-aio */ - void (*bdrv_io_plug)(BlockDriverState *bs); - void (*bdrv_io_unplug)(BlockDriverState *bs); - - /** - * Try to get @bs's logical and physical block size. - * On success, store them in @bsz and return zero. - * On failure, return negative errno. - */ - int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz); - /** - * Try to get @bs's geometry (cyls, heads, sectors) - * On success, store them in @geo and return 0. - * On failure return -errno. - * Only drivers that want to override guest geometry implement this - * callback; see hd_geometry_guess(). - */ - int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo); - - /** - * bdrv_co_drain_begin is called if implemented in the beginning of a - * drain operation to drain and stop any internal sources of requests in - * the driver. - * bdrv_co_drain_end is called if implemented at the end of the drain. - * - * They should be used by the driver to e.g. manage scheduled I/O - * requests, or toggle an internal state. After the end of the drain new - * requests will continue normally. - */ - void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); - void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); - - void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child, - Error **errp); - void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child, - Error **errp); - - /** - * Informs the block driver that a permission change is intended. The - * driver checks whether the change is permissible and may take other - * preparations for the change (e.g. get file system locks). This operation - * is always followed either by a call to either .bdrv_set_perm or - * .bdrv_abort_perm_update. - * - * Checks whether the requested set of cumulative permissions in @perm - * can be granted for accessing @bs and whether no other users are using - * permissions other than those given in @shared (both arguments take - * BLK_PERM_* bitmasks). - * - * If both conditions are met, 0 is returned. Otherwise, -errno is returned - * and errp is set to an error describing the conflict. - */ - int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm, - uint64_t shared, Error **errp); - - /** - * Called to inform the driver that the set of cumulative set of used - * permissions for @bs has changed to @perm, and the set of sharable - * permission to @shared. The driver can use this to propagate changes to - * its children (i.e. request permissions only if a parent actually needs - * them). - * - * This function is only invoked after bdrv_check_perm(), so block drivers - * may rely on preparations made in their .bdrv_check_perm implementation. - */ - void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared); - - /* - * Called to inform the driver that after a previous bdrv_check_perm() - * call, the permission update is not performed and any preparations made - * for it (e.g. taken file locks) need to be undone. - * - * This function can be called even for nodes that never saw a - * bdrv_check_perm() call. It is a no-op then. - */ - void (*bdrv_abort_perm_update)(BlockDriverState *bs); - - /** - * Returns in @nperm and @nshared the permissions that the driver for @bs - * needs on its child @c, based on the cumulative permissions requested by - * the parents in @parent_perm and @parent_shared. - * - * If @c is NULL, return the permissions for attaching a new child for the - * given @child_class and @role. - * - * If @reopen_queue is non-NULL, don't return the currently needed - * permissions, but those that will be needed after applying the - * @reopen_queue. - */ - void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c, - BdrvChildRole role, - BlockReopenQueue *reopen_queue, - uint64_t parent_perm, uint64_t parent_shared, - uint64_t *nperm, uint64_t *nshared); - - bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); - bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs, - const char *name, - uint32_t granularity, - Error **errp); - int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs, - const char *name, - Error **errp); - - /** - * Register/unregister a buffer for I/O. For example, when the driver is - * interested to know the memory areas that will later be used in iovs, so - * that it can do IOMMU mapping with VFIO etc., in order to get better - * performance. In the case of VFIO drivers, this callback is used to do - * DMA mapping for hot buffers. - */ - void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size); - void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host); - QLIST_ENTRY(BlockDriver) list; - - /* Pointer to a NULL-terminated array of names of strong options - * that can be specified for bdrv_open(). A strong option is one - * that changes the data of a BDS. - * If this pointer is NULL, the array is considered empty. - * "filename" and "driver" are always considered strong. */ - const char *const *strong_runtime_opts; -}; - -static inline bool block_driver_can_compress(BlockDriver *drv) -{ - return drv->bdrv_co_pwritev_compressed || - drv->bdrv_co_pwritev_compressed_part; -} - -typedef struct BlockLimits { - /* Alignment requirement, in bytes, for offset/length of I/O - * requests. Must be a power of 2 less than INT_MAX; defaults to - * 1 for drivers with modern byte interfaces, and to 512 - * otherwise. */ - uint32_t request_alignment; - - /* - * Maximum number of bytes that can be discarded at once. Must be multiple - * of pdiscard_alignment, but need not be power of 2. May be 0 if no - * inherent 64-bit limit. - */ - int64_t max_pdiscard; - - /* Optimal alignment for discard requests in bytes. A power of 2 - * is best but not mandatory. Must be a multiple of - * bl.request_alignment, and must be less than max_pdiscard if - * that is set. May be 0 if bl.request_alignment is good enough */ - uint32_t pdiscard_alignment; - - /* - * Maximum number of bytes that can zeroized at once. Must be multiple of - * pwrite_zeroes_alignment. 0 means no limit. - */ - int64_t max_pwrite_zeroes; - - /* Optimal alignment for write zeroes requests in bytes. A power - * of 2 is best but not mandatory. Must be a multiple of - * bl.request_alignment, and must be less than max_pwrite_zeroes - * if that is set. May be 0 if bl.request_alignment is good - * enough */ - uint32_t pwrite_zeroes_alignment; - - /* Optimal transfer length in bytes. A power of 2 is best but not - * mandatory. Must be a multiple of bl.request_alignment, or 0 if - * no preferred size */ - uint32_t opt_transfer; - - /* Maximal transfer length in bytes. Need not be power of 2, but - * must be multiple of opt_transfer and bl.request_alignment, or 0 - * for no 32-bit limit. For now, anything larger than INT_MAX is - * clamped down. */ - uint32_t max_transfer; - - /* Maximal hardware transfer length in bytes. Applies whenever - * transfers to the device bypass the kernel I/O scheduler, for - * example with SG_IO. If larger than max_transfer or if zero, - * blk_get_max_hw_transfer will fall back to max_transfer. - */ - uint64_t max_hw_transfer; - - /* Maximal number of scatter/gather elements allowed by the hardware. - * Applies whenever transfers to the device bypass the kernel I/O - * scheduler, for example with SG_IO. If larger than max_iov - * or if zero, blk_get_max_hw_iov will fall back to max_iov. - */ - int max_hw_iov; - - /* memory alignment, in bytes so that no bounce buffer is needed */ - size_t min_mem_alignment; - - /* memory alignment, in bytes, for bounce buffer */ - size_t opt_mem_alignment; - - /* maximum number of iovec elements */ - int max_iov; -} BlockLimits; - -typedef struct BdrvOpBlocker BdrvOpBlocker; - -typedef struct BdrvAioNotifier { - void (*attached_aio_context)(AioContext *new_context, void *opaque); - void (*detach_aio_context)(void *opaque); - - void *opaque; - bool deleted; - - QLIST_ENTRY(BdrvAioNotifier) list; -} BdrvAioNotifier; - -struct BdrvChildClass { - /* If true, bdrv_replace_node() doesn't change the node this BdrvChild - * points to. */ - bool stay_at_node; - - /* If true, the parent is a BlockDriverState and bdrv_next_all_states() - * will return it. This information is used for drain_all, where every node - * will be drained separately, so the drain only needs to be propagated to - * non-BDS parents. */ - bool parent_is_bds; - - void (*inherit_options)(BdrvChildRole role, bool parent_is_format, - int *child_flags, QDict *child_options, - int parent_flags, QDict *parent_options); - - void (*change_media)(BdrvChild *child, bool load); - void (*resize)(BdrvChild *child); - - /* Returns a name that is supposedly more useful for human users than the - * node name for identifying the node in question (in particular, a BB - * name), or NULL if the parent can't provide a better name. */ - const char *(*get_name)(BdrvChild *child); - - /* Returns a malloced string that describes the parent of the child for a - * human reader. This could be a node-name, BlockBackend name, qdev ID or - * QOM path of the device owning the BlockBackend, job type and ID etc. The - * caller is responsible for freeing the memory. */ - char *(*get_parent_desc)(BdrvChild *child); - - /* - * If this pair of functions is implemented, the parent doesn't issue new - * requests after returning from .drained_begin() until .drained_end() is - * called. - * - * These functions must not change the graph (and therefore also must not - * call aio_poll(), which could change the graph indirectly). - * - * If drained_end() schedules background operations, it must atomically - * increment *drained_end_counter for each such operation and atomically - * decrement it once the operation has settled. - * - * Note that this can be nested. If drained_begin() was called twice, new - * I/O is allowed only after drained_end() was called twice, too. - */ - void (*drained_begin)(BdrvChild *child); - void (*drained_end)(BdrvChild *child, int *drained_end_counter); - - /* - * Returns whether the parent has pending requests for the child. This - * callback is polled after .drained_begin() has been called until all - * activity on the child has stopped. - */ - bool (*drained_poll)(BdrvChild *child); - - /* Notifies the parent that the child has been activated/inactivated (e.g. - * when migration is completing) and it can start/stop requesting - * permissions and doing I/O on it. */ - void (*activate)(BdrvChild *child, Error **errp); - int (*inactivate)(BdrvChild *child); - - void (*attach)(BdrvChild *child); - void (*detach)(BdrvChild *child); - - /* Notifies the parent that the filename of its child has changed (e.g. - * because the direct child was removed from the backing chain), so that it - * can update its reference. */ - int (*update_filename)(BdrvChild *child, BlockDriverState *new_base, - const char *filename, Error **errp); - - bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx, - GSList **ignore, Error **errp); - void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore); - - AioContext *(*get_parent_aio_context)(BdrvChild *child); -}; - -extern const BdrvChildClass child_of_bds; - -struct BdrvChild { - BlockDriverState *bs; - char *name; - const BdrvChildClass *klass; - BdrvChildRole role; - void *opaque; - - /** - * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask) - */ - uint64_t perm; - - /** - * Permissions that can still be granted to other users of @bs while this - * BdrvChild is still attached to it. (BLK_PERM_* bitmask) - */ - uint64_t shared_perm; - - /* - * This link is frozen: the child can neither be replaced nor - * detached from the parent. - */ - bool frozen; - - /* - * How many times the parent of this child has been drained - * (through klass->drained_*). - * Usually, this is equal to bs->quiesce_counter (potentially - * reduced by bdrv_drain_all_count). It may differ while the - * child is entering or leaving a drained section. - */ - int parent_quiesce_counter; - - QLIST_ENTRY(BdrvChild) next; - QLIST_ENTRY(BdrvChild) next_parent; -}; - -/* - * Allows bdrv_co_block_status() to cache one data region for a - * protocol node. - * - * @valid: Whether the cache is valid (should be accessed with atomic - * functions so this can be reset by RCU readers) - * @data_start: Offset where we know (or strongly assume) is data - * @data_end: Offset where the data region ends (which is not necessarily - * the start of a zeroed region) - */ -typedef struct BdrvBlockStatusCache { - struct rcu_head rcu; - - bool valid; - int64_t data_start; - int64_t data_end; -} BdrvBlockStatusCache; - -struct BlockDriverState { - /* Protected by big QEMU lock or read-only after opening. No special - * locking needed during I/O... - */ - int open_flags; /* flags used to open the file, re-used for re-open */ - bool encrypted; /* if true, the media is encrypted */ - bool sg; /* if true, the device is a /dev/sg* */ - bool probed; /* if true, format was probed rather than specified */ - bool force_share; /* if true, always allow all shared permissions */ - bool implicit; /* if true, this filter node was automatically inserted */ - - BlockDriver *drv; /* NULL means no media */ - void *opaque; - - AioContext *aio_context; /* event loop used for fd handlers, timers, etc */ - /* long-running tasks intended to always use the same AioContext as this - * BDS may register themselves in this list to be notified of changes - * regarding this BDS's context */ - QLIST_HEAD(, BdrvAioNotifier) aio_notifiers; - bool walking_aio_notifiers; /* to make removal during iteration safe */ - - char filename[PATH_MAX]; - /* - * If not empty, this image is a diff in relation to backing_file. - * Note that this is the name given in the image header and - * therefore may or may not be equal to .backing->bs->filename. - * If this field contains a relative path, it is to be resolved - * relatively to the overlay's location. - */ - char backing_file[PATH_MAX]; - /* - * The backing filename indicated by the image header. Contrary - * to backing_file, if we ever open this file, auto_backing_file - * is replaced by the resulting BDS's filename (i.e. after a - * bdrv_refresh_filename() run). - */ - char auto_backing_file[PATH_MAX]; - char backing_format[16]; /* if non-zero and backing_file exists */ - - QDict *full_open_options; - char exact_filename[PATH_MAX]; - - BdrvChild *backing; - BdrvChild *file; - - /* I/O Limits */ - BlockLimits bl; - - /* - * Flags honored during pread - */ - unsigned int supported_read_flags; - /* Flags honored during pwrite (so far: BDRV_REQ_FUA, - * BDRV_REQ_WRITE_UNCHANGED). - * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those - * writes will be issued as normal writes without the flag set. - * This is important to note for drivers that do not explicitly - * request a WRITE permission for their children and instead take - * the same permissions as their parent did (this is commonly what - * block filters do). Such drivers have to be aware that the - * parent may have taken a WRITE_UNCHANGED permission only and is - * issuing such requests. Drivers either must make sure that - * these requests do not result in plain WRITE accesses (usually - * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding - * every incoming write request as-is, including potentially that - * flag), or they have to explicitly take the WRITE permission for - * their children. */ - unsigned int supported_write_flags; - /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, - * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */ - unsigned int supported_zero_flags; - /* - * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE). - * - * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure - * that any added space reads as all zeros. If this can't be guaranteed, - * the operation must fail. - */ - unsigned int supported_truncate_flags; - - /* the following member gives a name to every node on the bs graph. */ - char node_name[32]; - /* element of the list of named nodes building the graph */ - QTAILQ_ENTRY(BlockDriverState) node_list; - /* element of the list of all BlockDriverStates (all_bdrv_states) */ - QTAILQ_ENTRY(BlockDriverState) bs_list; - /* element of the list of monitor-owned BDS */ - QTAILQ_ENTRY(BlockDriverState) monitor_list; - int refcnt; - - /* operation blockers */ - QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX]; - - /* The node that this node inherited default options from (and a reopen on - * which can affect this node by changing these defaults). This is always a - * parent node of this node. */ - BlockDriverState *inherits_from; - QLIST_HEAD(, BdrvChild) children; - QLIST_HEAD(, BdrvChild) parents; - - QDict *options; - QDict *explicit_options; - BlockdevDetectZeroesOptions detect_zeroes; - - /* The error object in use for blocking operations on backing_hd */ - Error *backing_blocker; - - /* Protected by AioContext lock */ - - /* If we are reading a disk image, give its size in sectors. - * Generally read-only; it is written to by load_snapshot and - * save_snaphost, but the block layer is quiescent during those. - */ - int64_t total_sectors; - - /* threshold limit for writes, in bytes. "High water mark". */ - uint64_t write_threshold_offset; - - /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex. - * Reading from the list can be done with either the BQL or the - * dirty_bitmap_mutex. Modifying a bitmap only requires - * dirty_bitmap_mutex. */ - QemuMutex dirty_bitmap_mutex; - QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; - - /* Offset after the highest byte written to */ - Stat64 wr_highest_offset; - - /* If true, copy read backing sectors into image. Can be >1 if more - * than one client has requested copy-on-read. Accessed with atomic - * ops. - */ - int copy_on_read; - - /* number of in-flight requests; overall and serialising. - * Accessed with atomic ops. - */ - unsigned int in_flight; - unsigned int serialising_in_flight; - - /* counter for nested bdrv_io_plug. - * Accessed with atomic ops. - */ - unsigned io_plugged; - - /* do we need to tell the quest if we have a volatile write cache? */ - int enable_write_cache; - - /* Accessed with atomic ops. */ - int quiesce_counter; - int recursive_quiesce_counter; - - unsigned int write_gen; /* Current data generation */ - - /* Protected by reqs_lock. */ - CoMutex reqs_lock; - QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; - CoQueue flush_queue; /* Serializing flush queue */ - bool active_flush_req; /* Flush request in flight? */ - - /* Only read/written by whoever has set active_flush_req to true. */ - unsigned int flushed_gen; /* Flushed write generation */ - - /* BdrvChild links to this node may never be frozen */ - bool never_freeze; - - /* Lock for block-status cache RCU writers */ - CoMutex bsc_modify_lock; - /* Always non-NULL, but must only be dereferenced under an RCU read guard */ - BdrvBlockStatusCache *block_status_cache; -}; - -struct BlockBackendRootState { - int open_flags; - BlockdevDetectZeroesOptions detect_zeroes; -}; - -typedef enum BlockMirrorBackingMode { - /* Reuse the existing backing chain from the source for the target. - * - sync=full: Set backing BDS to NULL. - * - sync=top: Use source's backing BDS. - * - sync=none: Use source as the backing BDS. */ - MIRROR_SOURCE_BACKING_CHAIN, - - /* Open the target's backing chain completely anew */ - MIRROR_OPEN_BACKING_CHAIN, - - /* Do not change the target's backing BDS after job completion */ - MIRROR_LEAVE_BACKING_CHAIN, -} BlockMirrorBackingMode; - - -/* Essential block drivers which must always be statically linked into qemu, and - * which therefore can be accessed without using bdrv_find_format() */ -extern BlockDriver bdrv_file; -extern BlockDriver bdrv_raw; -extern BlockDriver bdrv_qcow2; - -int coroutine_fn bdrv_co_preadv(BdrvChild *child, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); -int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, - int64_t offset, int64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); -int coroutine_fn bdrv_co_pwritev(BdrvChild *child, - int64_t offset, int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); -int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, - int64_t offset, int64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); - -static inline int coroutine_fn bdrv_co_pread(BdrvChild *child, - int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags) -{ - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); - - return bdrv_co_preadv(child, offset, bytes, &qiov, flags); -} - -static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child, - int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags) -{ - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); - - return bdrv_co_pwritev(child, offset, bytes, &qiov, flags); -} - -extern unsigned int bdrv_drain_all_count; -void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); -void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); - -bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req, - uint64_t align); -BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs); - -int get_tmp_filename(char *filename, int size); -BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, - const char *filename); - -void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, - QDict *options); - -/** - * bdrv_add_aio_context_notifier: - * - * If a long-running job intends to be always run in the same AioContext as a - * certain BDS, it may use this function to be notified of changes regarding the - * association of the BDS to an AioContext. - * - * attached_aio_context() is called after the target BDS has been attached to a - * new AioContext; detach_aio_context() is called before the target BDS is being - * detached from its old AioContext. - */ -void bdrv_add_aio_context_notifier(BlockDriverState *bs, - void (*attached_aio_context)(AioContext *new_context, void *opaque), - void (*detach_aio_context)(void *opaque), void *opaque); - -/** - * bdrv_remove_aio_context_notifier: - * - * Unsubscribe of change notifications regarding the BDS's AioContext. The - * parameters given here have to be the same as those given to - * bdrv_add_aio_context_notifier(). - */ -void bdrv_remove_aio_context_notifier(BlockDriverState *bs, - void (*aio_context_attached)(AioContext *, - void *), - void (*aio_context_detached)(void *), - void *opaque); - -/** - * bdrv_wakeup: - * @bs: The BlockDriverState for which an I/O operation has been completed. - * - * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During - * synchronous I/O on a BlockDriverState that is attached to another - * I/O thread, the main thread lets the I/O thread's event loop run, - * waiting for the I/O operation to complete. A bdrv_wakeup will wake - * up the main thread if necessary. - * - * Manual calls to bdrv_wakeup are rarely necessary, because - * bdrv_dec_in_flight already calls it. - */ -void bdrv_wakeup(BlockDriverState *bs); - -#ifdef _WIN32 -int is_windows_drive(const char *filename); -#endif - -/** - * stream_start: - * @job_id: The id of the newly-created job, or %NULL to use the - * device name of @bs. - * @bs: Block device to operate on. - * @base: Block device that will become the new base, or %NULL to - * flatten the whole backing file chain onto @bs. - * @backing_file_str: The file name that will be written to @bs as the - * the new backing file if the job completes. Ignored if @base is %NULL. - * @creation_flags: Flags that control the behavior of the Job lifetime. - * See @BlockJobCreateFlags - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @on_error: The action to take upon error. - * @filter_node_name: The node name that should be assigned to the filter - * driver that the stream job inserts into the graph above - * @bs. NULL means that a node name should be autogenerated. - * @errp: Error object. - * - * Start a streaming operation on @bs. Clusters that are unallocated - * in @bs, but allocated in any image between @base and @bs (both - * exclusive) will be written to @bs. At the end of a successful - * streaming job, the backing file of @bs will be changed to - * @backing_file_str in the written image and to @base in the live - * BlockDriverState. - */ -void stream_start(const char *job_id, BlockDriverState *bs, - BlockDriverState *base, const char *backing_file_str, - BlockDriverState *bottom, - int creation_flags, int64_t speed, - BlockdevOnError on_error, - const char *filter_node_name, - Error **errp); - -/** - * commit_start: - * @job_id: The id of the newly-created job, or %NULL to use the - * device name of @bs. - * @bs: Active block device. - * @top: Top block device to be committed. - * @base: Block device that will be written into, and become the new top. - * @creation_flags: Flags that control the behavior of the Job lifetime. - * See @BlockJobCreateFlags - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @on_error: The action to take upon error. - * @backing_file_str: String to use as the backing file in @top's overlay - * @filter_node_name: The node name that should be assigned to the filter - * driver that the commit job inserts into the graph above @top. NULL means - * that a node name should be autogenerated. - * @errp: Error object. - * - */ -void commit_start(const char *job_id, BlockDriverState *bs, - BlockDriverState *base, BlockDriverState *top, - int creation_flags, int64_t speed, - BlockdevOnError on_error, const char *backing_file_str, - const char *filter_node_name, Error **errp); -/** - * commit_active_start: - * @job_id: The id of the newly-created job, or %NULL to use the - * device name of @bs. - * @bs: Active block device to be committed. - * @base: Block device that will be written into, and become the new top. - * @creation_flags: Flags that control the behavior of the Job lifetime. - * See @BlockJobCreateFlags - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @on_error: The action to take upon error. - * @filter_node_name: The node name that should be assigned to the filter - * driver that the commit job inserts into the graph above @bs. NULL means that - * a node name should be autogenerated. - * @cb: Completion function for the job. - * @opaque: Opaque pointer value passed to @cb. - * @auto_complete: Auto complete the job. - * @errp: Error object. - * - */ -BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, - BlockDriverState *base, int creation_flags, - int64_t speed, BlockdevOnError on_error, - const char *filter_node_name, - BlockCompletionFunc *cb, void *opaque, - bool auto_complete, Error **errp); -/* - * mirror_start: - * @job_id: The id of the newly-created job, or %NULL to use the - * device name of @bs. - * @bs: Block device to operate on. - * @target: Block device to write to. - * @replaces: Block graph node name to replace once the mirror is done. Can - * only be used when full mirroring is selected. - * @creation_flags: Flags that control the behavior of the Job lifetime. - * See @BlockJobCreateFlags - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @granularity: The chosen granularity for the dirty bitmap. - * @buf_size: The amount of data that can be in flight at one time. - * @mode: Whether to collapse all images in the chain to the target. - * @backing_mode: How to establish the target's backing chain after completion. - * @zero_target: Whether the target should be explicitly zero-initialized - * @on_source_error: The action to take upon error reading from the source. - * @on_target_error: The action to take upon error writing to the target. - * @unmap: Whether to unmap target where source sectors only contain zeroes. - * @filter_node_name: The node name that should be assigned to the filter - * driver that the mirror job inserts into the graph above @bs. NULL means that - * a node name should be autogenerated. - * @copy_mode: When to trigger writes to the target. - * @errp: Error object. - * - * Start a mirroring operation on @bs. Clusters that are allocated - * in @bs will be written to @target until the job is cancelled or - * manually completed. At the end of a successful mirroring job, - * @bs will be switched to read from @target. - */ -void mirror_start(const char *job_id, BlockDriverState *bs, - BlockDriverState *target, const char *replaces, - int creation_flags, int64_t speed, - uint32_t granularity, int64_t buf_size, - MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, - bool zero_target, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - bool unmap, const char *filter_node_name, - MirrorCopyMode copy_mode, Error **errp); - -/* - * backup_job_create: - * @job_id: The id of the newly-created job, or %NULL to use the - * device name of @bs. - * @bs: Block device to operate on. - * @target: Block device to write to. - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @sync_mode: What parts of the disk image should be copied to the destination. - * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental' - * @bitmap_mode: The bitmap synchronization policy to use. - * @perf: Performance options. All actual fields assumed to be present, - * all ".has_*" fields are ignored. - * @on_source_error: The action to take upon error reading from the source. - * @on_target_error: The action to take upon error writing to the target. - * @creation_flags: Flags that control the behavior of the Job lifetime. - * See @BlockJobCreateFlags - * @cb: Completion function for the job. - * @opaque: Opaque pointer value passed to @cb. - * @txn: Transaction that this job is part of (may be NULL). - * - * Create a backup operation on @bs. Clusters in @bs are written to @target - * until the job is cancelled or manually completed. - */ -BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - BlockDriverState *target, int64_t speed, - MirrorSyncMode sync_mode, - BdrvDirtyBitmap *sync_bitmap, - BitmapSyncMode bitmap_mode, - bool compress, - const char *filter_node_name, - BackupPerf *perf, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - int creation_flags, - BlockCompletionFunc *cb, void *opaque, - JobTxn *txn, Error **errp); - -BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, - const char *child_name, - const BdrvChildClass *child_class, - BdrvChildRole child_role, - uint64_t perm, uint64_t shared_perm, - void *opaque, Error **errp); -void bdrv_root_unref_child(BdrvChild *child); - -void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, - uint64_t *shared_perm); - -/** - * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use - * bdrv_child_refresh_perms() instead and make the parent's - * .bdrv_child_perm() implementation return the correct values. - */ -int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, - Error **errp); - -/** - * Calls bs->drv->bdrv_child_perm() and updates the child's permission - * masks with the result. - * Drivers should invoke this function whenever an event occurs that - * makes their .bdrv_child_perm() implementation return different - * values than before, but which will not result in the block layer - * automatically refreshing the permissions. - */ -int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp); - -bool bdrv_recurse_can_replace(BlockDriverState *bs, - BlockDriverState *to_replace); - -/* - * Default implementation for BlockDriver.bdrv_child_perm() that can - * be used by block filters and image formats, as long as they use the - * child_of_bds child class and set an appropriate BdrvChildRole. - */ -void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, - BdrvChildRole role, BlockReopenQueue *reopen_queue, - uint64_t perm, uint64_t shared, - uint64_t *nperm, uint64_t *nshared); - -const char *bdrv_get_parent_name(const BlockDriverState *bs); -void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); -bool blk_dev_has_removable_media(BlockBackend *blk); -bool blk_dev_has_tray(BlockBackend *blk); -void blk_dev_eject_request(BlockBackend *blk, bool force); -bool blk_dev_is_tray_open(BlockBackend *blk); -bool blk_dev_is_medium_locked(BlockBackend *blk); - -void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes); - -void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); -void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup); -bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest, - const BdrvDirtyBitmap *src, - HBitmap **backup, bool lock); - -void bdrv_inc_in_flight(BlockDriverState *bs); -void bdrv_dec_in_flight(BlockDriverState *bs); - -void blockdev_close_all_bdrv_states(void); - -int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset, - BdrvChild *dst, int64_t dst_offset, - int64_t bytes, - BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); -int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset, - BdrvChild *dst, int64_t dst_offset, - int64_t bytes, - BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); - -int refresh_total_sectors(BlockDriverState *bs, int64_t hint); - -void bdrv_set_monitor_owned(BlockDriverState *bs); -BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp); - -/** - * Simple implementation of bdrv_co_create_opts for protocol drivers - * which only support creation via opening a file - * (usually existing raw storage device) - */ -int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, - const char *filename, - QemuOpts *opts, - Error **errp); -extern QemuOptsList bdrv_create_opts_simple; - -BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, - const char *name, - BlockDriverState **pbs, - Error **errp); -BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, - BlockDirtyBitmapMergeSourceList *bms, - HBitmap **backup, Error **errp); -BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, - bool release, - BlockDriverState **bitmap_bs, - Error **errp); - -BdrvChild *bdrv_cow_child(BlockDriverState *bs); -BdrvChild *bdrv_filter_child(BlockDriverState *bs); -BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs); -BdrvChild *bdrv_primary_child(BlockDriverState *bs); -BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs); -BlockDriverState *bdrv_skip_filters(BlockDriverState *bs); -BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs); - -static inline BlockDriverState *child_bs(BdrvChild *child) -{ - return child ? child->bs : NULL; -} - -static inline BlockDriverState *bdrv_cow_bs(BlockDriverState *bs) -{ - return child_bs(bdrv_cow_child(bs)); -} - -static inline BlockDriverState *bdrv_filter_bs(BlockDriverState *bs) -{ - return child_bs(bdrv_filter_child(bs)); -} - -static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs) -{ - return child_bs(bdrv_filter_or_cow_child(bs)); -} - -static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs) -{ - return child_bs(bdrv_primary_child(bs)); -} - -/** - * End all quiescent sections started by bdrv_drain_all_begin(). This is - * needed when deleting a BDS before bdrv_drain_all_end() is called. - * - * NOTE: this is an internal helper for bdrv_close() *only*. No one else - * should call it. - */ -void bdrv_drain_all_end_quiesce(BlockDriverState *bs); - -/** - * Check whether the given offset is in the cached block-status data - * region. - * - * If it is, and @pnum is not NULL, *pnum is set to - * `bsc.data_end - offset`, i.e. how many bytes, starting from - * @offset, are data (according to the cache). - * Otherwise, *pnum is not touched. - */ -bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum); - -/** - * If [offset, offset + bytes) overlaps with the currently cached - * block-status region, invalidate the cache. - * - * (To be used by I/O paths that cause data regions to be zero or - * holes.) - */ -void bdrv_bsc_invalidate_range(BlockDriverState *bs, - int64_t offset, int64_t bytes); - -/** - * Mark the range [offset, offset + bytes) as a data region. - */ -void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); +/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */ #endif /* BLOCK_INT_H */ diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 87fbb3985f..6525e16fd5 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -74,6 +74,13 @@ typedef struct BlockJob { GSList *nodes; } BlockJob; +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + /** * block_job_next: * @job: A block job, or %NULL. @@ -155,6 +162,21 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp); */ void block_job_iostatus_reset(BlockJob *job); +/* + * block_job_get_aio_context: + * + * Returns aio context associated with a block job. + */ +AioContext *block_job_get_aio_context(BlockJob *job); + + +/* + * Common functions that are neither I/O nor Global State. + * + * See include/block/block-common.h for more information about + * the Common API. + */ + /** * block_job_is_internal: * @job: The job to determine if it is user-visible or not. @@ -170,11 +192,4 @@ bool block_job_is_internal(BlockJob *job); */ const BlockJobDriver *block_job_driver(BlockJob *job); -/* - * block_job_get_aio_context: - * - * Returns aio context associated with a block job. - */ -AioContext *block_job_get_aio_context(BlockJob *job); - #endif diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 6633d83da2..6bd9ae2b20 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -39,6 +39,13 @@ struct BlockJobDriver { JobDriver job_driver; /* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + + /* * Returns whether the job has pending requests for the child or will * submit new requests before the next pause point. This callback is polled * in the context of draining a job node after requesting that the job be @@ -47,6 +54,13 @@ struct BlockJobDriver { bool (*drained_poll)(BlockJob *job); /* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + + /* * If the callback is not NULL, it will be invoked before the job is * resumed in a new AioContext. This is the place to move any resources * besides job->blk to the new AioContext. @@ -56,6 +70,13 @@ struct BlockJobDriver { void (*set_speed)(BlockJob *job, int64_t speed); }; +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + /** * block_job_create: * @job_id: The id of the newly-created job, or %NULL to have one @@ -98,6 +119,13 @@ void block_job_free(Job *job); */ void block_job_user_resume(Job *job); +/* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + /** * block_job_ratelimit_get_delay: * diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 40950ae3d5..6528336c4c 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -77,7 +77,7 @@ void bdrv_dirty_bitmap_set_persistence(BdrvDirtyBitmap *bitmap, bool persistent); void bdrv_dirty_bitmap_set_inconsistent(BdrvDirtyBitmap *bitmap); void bdrv_dirty_bitmap_set_busy(BdrvDirtyBitmap *bitmap, bool busy); -void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src, +bool bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src, HBitmap **backup, Error **errp); void bdrv_dirty_bitmap_skip_store(BdrvDirtyBitmap *bitmap, bool skip); bool bdrv_dirty_bitmap_get(BdrvDirtyBitmap *bitmap, int64_t offset); @@ -115,6 +115,8 @@ int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, int64_t offset, bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap *bitmap, int64_t start, int64_t end, int64_t max_dirty_count, int64_t *dirty_start, int64_t *dirty_count); +bool bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap, int64_t offset, + int64_t bytes, int64_t *count); BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap, Error **errp); diff --git a/include/block/nvme.h b/include/block/nvme.h index cd068ac891..3737351cc8 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -695,7 +695,8 @@ typedef struct QEMU_PACKED NvmeRwCmd { uint8_t flags; uint16_t cid; uint32_t nsid; - uint64_t rsvd2; + uint32_t cdw2; + uint32_t cdw3; uint64_t mptr; NvmeCmdDptr dptr; uint64_t slba; @@ -731,7 +732,6 @@ enum { NVME_RW_PRINFO_PRCHK_APP = 1 << 11, NVME_RW_PRINFO_PRCHK_REF = 1 << 10, NVME_RW_PRINFO_PRCHK_MASK = 7 << 10, - }; #define NVME_RW_PRINFO(control) ((control >> 10) & 0xf) @@ -770,6 +770,7 @@ typedef struct QEMU_PACKED NvmeDsmRange { enum { NVME_COPY_FORMAT_0 = 0x0, + NVME_COPY_FORMAT_1 = 0x1, }; typedef struct QEMU_PACKED NvmeCopyCmd { @@ -777,7 +778,9 @@ typedef struct QEMU_PACKED NvmeCopyCmd { uint8_t flags; uint16_t cid; uint32_t nsid; - uint32_t rsvd2[4]; + uint32_t cdw2; + uint32_t cdw3; + uint32_t rsvd2[2]; NvmeCmdDptr dptr; uint64_t sdlba; uint8_t nr; @@ -789,7 +792,7 @@ typedef struct QEMU_PACKED NvmeCopyCmd { uint16_t appmask; } NvmeCopyCmd; -typedef struct QEMU_PACKED NvmeCopySourceRange { +typedef struct QEMU_PACKED NvmeCopySourceRangeFormat0 { uint8_t rsvd0[8]; uint64_t slba; uint16_t nlb; @@ -797,7 +800,17 @@ typedef struct QEMU_PACKED NvmeCopySourceRange { uint32_t reftag; uint16_t apptag; uint16_t appmask; -} NvmeCopySourceRange; +} NvmeCopySourceRangeFormat0; + +typedef struct QEMU_PACKED NvmeCopySourceRangeFormat1 { + uint8_t rsvd0[8]; + uint64_t slba; + uint16_t nlb; + uint8_t rsvd18[8]; + uint8_t sr[10]; + uint16_t apptag; + uint16_t appmask; +} NvmeCopySourceRangeFormat1; enum NvmeAsyncEventRequest { NVME_AER_TYPE_ERROR = 0, @@ -908,6 +921,7 @@ enum NvmeStatusCodes { NVME_CMP_FAILURE = 0x0285, NVME_ACCESS_DENIED = 0x0286, NVME_DULB = 0x0287, + NVME_E2E_STORAGE_TAG_ERROR = 0x0288, NVME_MORE = 0x2000, NVME_DNR = 0x4000, NVME_NO_COMPLETE = 0xffff, @@ -1111,6 +1125,10 @@ enum NvmeIdCtrlOaes { NVME_OAES_NS_ATTR = 1 << 8, }; +enum NvmeIdCtrlCtratt { + NVME_CTRATT_ELBAS = 1 << 15, +}; + enum NvmeIdCtrlOacs { NVME_OACS_SECURITY = 1 << 0, NVME_OACS_FORMAT = 1 << 1, @@ -1131,7 +1149,8 @@ enum NvmeIdCtrlOncs { }; enum NvmeIdCtrlOcfs { - NVME_OCFS_COPY_FORMAT_0 = 1 << 0, + NVME_OCFS_COPY_FORMAT_0 = 1 << NVME_COPY_FORMAT_0, + NVME_OCFS_COPY_FORMAT_1 = 1 << NVME_COPY_FORMAT_1, }; enum NvmeIdctrlVwc { @@ -1216,6 +1235,7 @@ enum NvmeFeatureIds { NVME_WRITE_ATOMICITY = 0xa, NVME_ASYNCHRONOUS_EVENT_CONF = 0xb, NVME_TIMESTAMP = 0xe, + NVME_HOST_BEHAVIOR_SUPPORT = 0x16, NVME_COMMAND_SET_PROFILE = 0x19, NVME_SOFTWARE_PROGRESS_MARKER = 0x80, NVME_FID_MAX = 0x100, @@ -1257,6 +1277,13 @@ typedef struct QEMU_PACKED NvmeRangeType { uint8_t rsvd48[16]; } NvmeRangeType; +typedef struct NvmeHostBehaviorSupport { + uint8_t acre; + uint8_t etdas; + uint8_t lbafee; + uint8_t rsvd3[509]; +} NvmeHostBehaviorSupport; + typedef struct QEMU_PACKED NvmeLBAF { uint16_t ms; uint8_t ds; @@ -1270,6 +1297,7 @@ typedef struct QEMU_PACKED NvmeLBAFE { } NvmeLBAFE; #define NVME_NSID_BROADCAST 0xffffffff +#define NVME_MAX_NLBAF 64 typedef struct QEMU_PACKED NvmeIdNs { uint64_t nsze; @@ -1304,11 +1332,20 @@ typedef struct QEMU_PACKED NvmeIdNs { uint8_t rsvd81[23]; uint8_t nguid[16]; uint64_t eui64; - NvmeLBAF lbaf[16]; - uint8_t rsvd192[192]; + NvmeLBAF lbaf[NVME_MAX_NLBAF]; uint8_t vs[3712]; } NvmeIdNs; +#define NVME_ID_NS_NVM_ELBAF_PIF(elbaf) (((elbaf) >> 7) & 0x3) + +typedef struct QEMU_PACKED NvmeIdNsNvm { + uint64_t lbstm; + uint8_t pic; + uint8_t rsvd9[3]; + uint32_t elbaf[NVME_MAX_NLBAF]; + uint8_t rsvd268[3828]; +} NvmeIdNsNvm; + typedef struct QEMU_PACKED NvmeIdNsDescr { uint8_t nidt; uint8_t nidl; @@ -1410,10 +1447,23 @@ enum NvmeIdNsMc { #define NVME_ID_NS_DPS_TYPE(dps) (dps & NVME_ID_NS_DPS_TYPE_MASK) -typedef struct NvmeDifTuple { - uint16_t guard; - uint16_t apptag; - uint32_t reftag; +enum NvmePIFormat { + NVME_PI_GUARD_16 = 0, + NVME_PI_GUARD_64 = 2, +}; + +typedef union NvmeDifTuple { + struct { + uint16_t guard; + uint16_t apptag; + uint32_t reftag; + } g16; + + struct { + uint64_t guard; + uint16_t apptag; + uint8_t sr[6]; + } g64; } NvmeDifTuple; enum NvmeZoneAttr { @@ -1510,7 +1560,8 @@ static inline void _nvme_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeZonedResult) != 8); QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16); QEMU_BUILD_BUG_ON(sizeof(NvmeDsmRange) != 16); - QEMU_BUILD_BUG_ON(sizeof(NvmeCopySourceRange) != 32); + QEMU_BUILD_BUG_ON(sizeof(NvmeCopySourceRangeFormat0) != 32); + QEMU_BUILD_BUG_ON(sizeof(NvmeCopySourceRangeFormat1) != 40); QEMU_BUILD_BUG_ON(sizeof(NvmeCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeDeleteQ) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeCreateCq) != 64); @@ -1520,6 +1571,7 @@ static inline void _nvme_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeDsmCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeCopyCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeRangeType) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeHostBehaviorSupport) != 512); QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512); QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512); @@ -1530,10 +1582,11 @@ static inline void _nvme_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeLBAF) != 4); QEMU_BUILD_BUG_ON(sizeof(NvmeLBAFE) != 16); QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096); + QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsNvm) != 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsZoned) != 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeSglDescriptor) != 16); QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsDescr) != 4); QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescr) != 64); - QEMU_BUILD_BUG_ON(sizeof(NvmeDifTuple) != 8); + QEMU_BUILD_BUG_ON(sizeof(NvmeDifTuple) != 16); } #endif diff --git a/include/block/reqlist.h b/include/block/reqlist.h new file mode 100644 index 0000000000..5253497bae --- /dev/null +++ b/include/block/reqlist.h @@ -0,0 +1,75 @@ +/* + * reqlist API + * + * Copyright (C) 2013 Proxmox Server Solutions + * Copyright (c) 2021 Virtuozzo International GmbH. + * + * Authors: + * Dietmar Maurer (dietmar@proxmox.com) + * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef REQLIST_H +#define REQLIST_H + +#include "qemu/coroutine.h" + +/* + * The API is not thread-safe and shouldn't be. The struct is public to be part + * of other structures and protected by third-party locks, see + * block/block-copy.c for example. + */ + +typedef struct BlockReq { + int64_t offset; + int64_t bytes; + + CoQueue wait_queue; /* coroutines blocked on this req */ + QLIST_ENTRY(BlockReq) list; +} BlockReq; + +typedef QLIST_HEAD(, BlockReq) BlockReqList; + +/* + * Initialize new request and add it to the list. Caller must be sure that + * there are no conflicting requests in the list. + */ +void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset, + int64_t bytes); +/* Search for request in the list intersecting with @offset/@bytes area. */ +BlockReq *reqlist_find_conflict(BlockReqList *reqs, int64_t offset, + int64_t bytes); + +/* + * If there are no intersecting requests return false. Otherwise, wait for the + * first found intersecting request to finish and return true. + * + * @lock is passed to qemu_co_queue_wait() + * False return value proves that lock was released at no point. + */ +bool coroutine_fn reqlist_wait_one(BlockReqList *reqs, int64_t offset, + int64_t bytes, CoMutex *lock); + +/* + * Wait for all intersecting requests. It just calls reqlist_wait_one() in a + * loop, caller is responsible to stop producing new requests in this region + * in parallel, otherwise reqlist_wait_all() may never return. + */ +void coroutine_fn reqlist_wait_all(BlockReqList *reqs, int64_t offset, + int64_t bytes, CoMutex *lock); + +/* + * Shrink request and wake all waiting coroutines (maybe some of them are not + * intersecting with shrunk request). + */ +void coroutine_fn reqlist_shrink_req(BlockReq *req, int64_t new_bytes); + +/* + * Remove request and wake all waiting coroutines. Do not release any memory. + */ +void coroutine_fn reqlist_remove_req(BlockReq *req); + +#endif /* REQLIST_H */ diff --git a/include/block/snapshot.h b/include/block/snapshot.h index 940345692f..50ff924710 100644 --- a/include/block/snapshot.h +++ b/include/block/snapshot.h @@ -45,6 +45,13 @@ typedef struct QEMUSnapshotInfo { uint64_t icount; /* record/replay step */ } QEMUSnapshotInfo; +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info, const char *name); bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, @@ -73,9 +80,11 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, Error **errp); -/* Group operations. All block drivers are involved. +/* + * Group operations. All block drivers are involved. * These functions will properly handle dataplane (take aio_context_acquire - * when appropriate for appropriate block drivers */ + * when appropriate for appropriate block drivers + */ bool bdrv_all_can_snapshot(bool has_devices, strList *devices, Error **errp); diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 84caf5c3d9..c0f0fab28a 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -433,10 +433,6 @@ int cpu_exec(CPUState *cpu); void tcg_exec_realizefn(CPUState *cpu, Error **errp); void tcg_exec_unrealizefn(CPUState *cpu); -/* Returns: 0 on success, -1 on error */ -int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, - void *ptr, target_ulong len, bool is_write); - /** * cpu_set_cpustate_pointers(cpu) * @cpu: The cpu object diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index de5f444b19..7f7b5943c7 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -7,6 +7,18 @@ #include "exec/hwaddr.h" #endif +/** + * vaddr: + * Type wide enough to contain any #target_ulong virtual address. + */ +typedef uint64_t vaddr; +#define VADDR_PRId PRId64 +#define VADDR_PRIu PRIu64 +#define VADDR_PRIo PRIo64 +#define VADDR_PRIx PRIx64 +#define VADDR_PRIX PRIX64 +#define VADDR_MAX UINT64_MAX + /* Using intptr_t ensures that qemu_*_page_mask is sign-extended even * when intptr_t is 32-bit and we are aligning a long long. */ @@ -78,6 +90,28 @@ void qemu_ram_unset_migratable(RAMBlock *rb); size_t qemu_ram_pagesize(RAMBlock *block); size_t qemu_ram_pagesize_largest(void); +/** + * cpu_address_space_init: + * @cpu: CPU to add this address space to + * @asidx: integer index of this address space + * @prefix: prefix to be used as name of address space + * @mr: the root memory region of address space + * + * Add the specified address space to the CPU's cpu_ases list. + * The address space added with @asidx 0 is the one used for the + * convenience pointer cpu->as. + * The target-specific code which registers ASes is responsible + * for defining what semantics address space 0, 1, 2, etc have. + * + * Before the first call to this function, the caller must set + * cpu->num_ases to the total number of address spaces it needs + * to support. + * + * Note that with KVM only one address space is supported. + */ +void cpu_address_space_init(CPUState *cpu, int asidx, + const char *prefix, MemoryRegion *mr); + void cpu_physical_memory_rw(hwaddr addr, void *buf, hwaddr len, bool is_write); static inline void cpu_physical_memory_read(hwaddr addr, @@ -90,6 +124,7 @@ static inline void cpu_physical_memory_write(hwaddr addr, { cpu_physical_memory_rw(addr, (void *)buf, len, true); } +void cpu_reloading_memory_map(void); void *cpu_physical_memory_map(hwaddr addr, hwaddr *plen, bool is_write); @@ -116,6 +151,10 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); #endif +/* Returns: 0 on success, -1 on error */ +int cpu_memory_rw_debug(CPUState *cpu, vaddr addr, + void *ptr, size_t len, bool is_write); + /* vl.c */ extern int singlestep; diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index da987fe8ad..6adacf8928 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -64,6 +64,7 @@ #include "exec/memopidx.h" #include "qemu/int128.h" +#include "cpu.h" #if defined(CONFIG_USER_ONLY) /* sparc32plus has 64bit long but 32bit space address diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 227e10ba56..d2cb0981f4 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -24,7 +24,6 @@ #ifdef CONFIG_TCG #include "exec/cpu_ldst.h" #endif -#include "sysemu/cpu-timers.h" /* allow to see translation results - the slowdown should be negligible, so we leave it */ #define DEBUG_DISAS @@ -81,31 +80,6 @@ static inline bool cpu_loop_exit_requested(CPUState *cpu) return (int32_t)qatomic_read(&cpu_neg(cpu)->icount_decr.u32) < 0; } -#if !defined(CONFIG_USER_ONLY) -void cpu_reloading_memory_map(void); -/** - * cpu_address_space_init: - * @cpu: CPU to add this address space to - * @asidx: integer index of this address space - * @prefix: prefix to be used as name of address space - * @mr: the root memory region of address space - * - * Add the specified address space to the CPU's cpu_ases list. - * The address space added with @asidx 0 is the one used for the - * convenience pointer cpu->as. - * The target-specific code which registers ASes is responsible - * for defining what semantics address space 0, 1, 2, etc have. - * - * Before the first call to this function, the caller must set - * cpu->num_ases to the total number of address spaces it needs - * to support. - * - * Note that with KVM only one address space is supported. - */ -void cpu_address_space_init(CPUState *cpu, int asidx, - const char *prefix, MemoryRegion *mr); -#endif - #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) /* cputlb.c */ /** diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h index a024a0350d..89edf94d28 100644 --- a/include/exec/gdbstub.h +++ b/include/exec/gdbstub.h @@ -45,17 +45,6 @@ void gdb_do_syscall(gdb_syscall_complete_cb cb, const char *fmt, ...); */ void gdb_do_syscallv(gdb_syscall_complete_cb cb, const char *fmt, va_list va); int use_gdb_syscalls(void); -void gdb_set_stop_cpu(CPUState *cpu); - -/** - * gdb_exit: exit gdb session, reporting inferior status - * @code: exit code reported - * - * This closes the session and sends a final packet to GDB reporting - * the exit status of the program. It also cleans up any connections - * detritus before returning. - */ -void gdb_exit(int code); #ifdef CONFIG_USER_ONLY /** @@ -165,7 +154,7 @@ static inline uint8_t * gdb_get_reg_ptr(GByteArray *buf, int len) #define ldtul_p(addr) ldl_p(addr) #endif -#endif +#endif /* NEED_CPU_H */ /** * gdbserver_start: start the gdb server @@ -178,6 +167,18 @@ static inline uint8_t * gdb_get_reg_ptr(GByteArray *buf, int len) int gdbserver_start(const char *port_or_device); /** + * gdb_exit: exit gdb session, reporting inferior status + * @code: exit code reported + * + * This closes the session and sends a final packet to GDB reporting + * the exit status of the program. It also cleans up any connections + * detritus before returning. + */ +void gdb_exit(int code); + +void gdb_set_stop_cpu(CPUState *cpu); + +/** * gdb_has_xml: * This is an ugly hack to cope with both new and old gdb. * If gdb sends qXfer:features:read then assume we're talking to a newish diff --git a/include/exec/poison.h b/include/exec/poison.h index 7ad4ad18e8..7c5c02f03f 100644 --- a/include/exec/poison.h +++ b/include/exec/poison.h @@ -51,8 +51,6 @@ #pragma GCC poison TARGET_PAGE_BITS #pragma GCC poison TARGET_PAGE_ALIGN -#pragma GCC poison CPUArchState - #pragma GCC poison CPU_INTERRUPT_HARD #pragma GCC poison CPU_INTERRUPT_EXITTB #pragma GCC poison CPU_INTERRUPT_HALT diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index c1ea17d0de..7e76ee2619 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -132,6 +132,7 @@ struct VirtMachineClass { bool no_secure_gpio; /* Machines < 6.2 have no support for describing cpu topology to guest */ bool no_cpu_topology; + bool no_tcg_lpa2; }; struct VirtMachineState { diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 76ab3b851c..0efc6153ed 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -22,6 +22,7 @@ #include "hw/qdev-core.h" #include "disas/dis-asm.h" +#include "exec/cpu-common.h" #include "exec/hwaddr.h" #include "exec/memattrs.h" #include "qapi/qapi-types-run-state.h" @@ -36,18 +37,6 @@ typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size, void *opaque); /** - * vaddr: - * Type wide enough to contain any #target_ulong virtual address. - */ -typedef uint64_t vaddr; -#define VADDR_PRId PRId64 -#define VADDR_PRIu PRIu64 -#define VADDR_PRIo PRIo64 -#define VADDR_PRIx PRIx64 -#define VADDR_PRIX PRIX64 -#define VADDR_MAX UINT64_MAX - -/** * SECTION:cpu * @section_id: QEMU-cpu * @title: CPU Class @@ -66,6 +55,24 @@ typedef struct CPUClass CPUClass; DECLARE_CLASS_CHECKERS(CPUClass, CPU, TYPE_CPU) +/** + * OBJECT_DECLARE_CPU_TYPE: + * @CpuInstanceType: instance struct name + * @CpuClassType: class struct name + * @CPU_MODULE_OBJ_NAME: the CPU name in uppercase with underscore separators + * + * This macro is typically used in "cpu-qom.h" header file, and will: + * + * - create the typedefs for the CPU object and class structs + * - register the type for use with g_autoptr + * - provide three standard type cast functions + * + * The object struct and class struct need to be declared manually. + */ +#define OBJECT_DECLARE_CPU_TYPE(CpuInstanceType, CpuClassType, CPU_MODULE_OBJ_NAME) \ + typedef struct ArchCPU CpuInstanceType; \ + OBJECT_DECLARE_TYPE(ArchCPU, CpuClassType, CPU_MODULE_OBJ_NAME); + typedef enum MMUAccessType { MMU_DATA_LOAD = 0, MMU_DATA_STORE = 1, @@ -351,7 +358,7 @@ struct CPUState { AddressSpace *as; MemoryRegion *memory; - void *env_ptr; /* CPUArchState */ + CPUArchState *env_ptr; IcountDecr *icount_decr_ptr; /* Accessed in parallel; all accesses must be atomic */ diff --git a/include/hw/intc/riscv_imsic.h b/include/hw/intc/riscv_imsic.h new file mode 100644 index 0000000000..58c2aaa8dc --- /dev/null +++ b/include/hw/intc/riscv_imsic.h @@ -0,0 +1,68 @@ +/* + * RISC-V IMSIC (Incoming Message Signal Interrupt Controller) interface + * + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef HW_RISCV_IMSIC_H +#define HW_RISCV_IMSIC_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +#define TYPE_RISCV_IMSIC "riscv.imsic" + +typedef struct RISCVIMSICState RISCVIMSICState; +DECLARE_INSTANCE_CHECKER(RISCVIMSICState, RISCV_IMSIC, TYPE_RISCV_IMSIC) + +#define IMSIC_MMIO_PAGE_SHIFT 12 +#define IMSIC_MMIO_PAGE_SZ (1UL << IMSIC_MMIO_PAGE_SHIFT) +#define IMSIC_MMIO_SIZE(__num_pages) ((__num_pages) * IMSIC_MMIO_PAGE_SZ) + +#define IMSIC_MMIO_HART_GUEST_MAX_BTIS 6 +#define IMSIC_MMIO_GROUP_MIN_SHIFT 24 + +#define IMSIC_HART_NUM_GUESTS(__guest_bits) \ + (1U << (__guest_bits)) +#define IMSIC_HART_SIZE(__guest_bits) \ + (IMSIC_HART_NUM_GUESTS(__guest_bits) * IMSIC_MMIO_PAGE_SZ) +#define IMSIC_GROUP_NUM_HARTS(__hart_bits) \ + (1U << (__hart_bits)) +#define IMSIC_GROUP_SIZE(__hart_bits, __guest_bits) \ + (IMSIC_GROUP_NUM_HARTS(__hart_bits) * IMSIC_HART_SIZE(__guest_bits)) + +struct RISCVIMSICState { + /*< private >*/ + SysBusDevice parent_obj; + qemu_irq *external_irqs; + + /*< public >*/ + MemoryRegion mmio; + uint32_t num_eistate; + uint32_t *eidelivery; + uint32_t *eithreshold; + uint32_t *eistate; + + /* config */ + bool mmode; + uint32_t hartid; + uint32_t num_pages; + uint32_t num_irqs; +}; + +DeviceState *riscv_imsic_create(hwaddr addr, uint32_t hartid, bool mmode, + uint32_t num_pages, uint32_t num_ids); + +#endif diff --git a/include/hw/riscv/opentitan.h b/include/hw/riscv/opentitan.h index eac35ef590..00da9ded43 100644 --- a/include/hw/riscv/opentitan.h +++ b/include/hw/riscv/opentitan.h @@ -57,8 +57,10 @@ enum { IBEX_DEV_FLASH, IBEX_DEV_FLASH_VIRTUAL, IBEX_DEV_UART, + IBEX_DEV_SPI_DEVICE, + IBEX_DEV_SPI_HOST0, + IBEX_DEV_SPI_HOST1, IBEX_DEV_GPIO, - IBEX_DEV_SPI, IBEX_DEV_I2C, IBEX_DEV_PATTGEN, IBEX_DEV_TIMER, diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h index 6e9f61ccd9..78b058ec86 100644 --- a/include/hw/riscv/virt.h +++ b/include/hw/riscv/virt.h @@ -24,26 +24,36 @@ #include "hw/block/flash.h" #include "qom/object.h" -#define VIRT_CPUS_MAX 32 -#define VIRT_SOCKETS_MAX 8 +#define VIRT_CPUS_MAX_BITS 9 +#define VIRT_CPUS_MAX (1 << VIRT_CPUS_MAX_BITS) +#define VIRT_SOCKETS_MAX_BITS 2 +#define VIRT_SOCKETS_MAX (1 << VIRT_SOCKETS_MAX_BITS) #define TYPE_RISCV_VIRT_MACHINE MACHINE_TYPE_NAME("virt") typedef struct RISCVVirtState RISCVVirtState; DECLARE_INSTANCE_CHECKER(RISCVVirtState, RISCV_VIRT_MACHINE, TYPE_RISCV_VIRT_MACHINE) +typedef enum RISCVVirtAIAType { + VIRT_AIA_TYPE_NONE = 0, + VIRT_AIA_TYPE_APLIC, + VIRT_AIA_TYPE_APLIC_IMSIC, +} RISCVVirtAIAType; + struct RISCVVirtState { /*< private >*/ MachineState parent; /*< public >*/ RISCVHartArrayState soc[VIRT_SOCKETS_MAX]; - DeviceState *plic[VIRT_SOCKETS_MAX]; + DeviceState *irqchip[VIRT_SOCKETS_MAX]; PFlashCFI01 *flash[2]; FWCfgState *fw_cfg; int fdt_size; bool have_aclint; + RISCVVirtAIAType aia_type; + int aia_guests; }; enum { @@ -54,9 +64,13 @@ enum { VIRT_CLINT, VIRT_ACLINT_SSWI, VIRT_PLIC, + VIRT_APLIC_M, + VIRT_APLIC_S, VIRT_UART0, VIRT_VIRTIO, VIRT_FW_CFG, + VIRT_IMSIC_M, + VIRT_IMSIC_S, VIRT_FLASH, VIRT_DRAM, VIRT_PCIE_MMIO, @@ -73,8 +87,13 @@ enum { VIRTIO_NDEV = 0x35 /* Arbitrary maximum number of interrupts */ }; -#define VIRT_PLIC_NUM_SOURCES 127 -#define VIRT_PLIC_NUM_PRIORITIES 7 +#define VIRT_IRQCHIP_IPI_MSI 1 +#define VIRT_IRQCHIP_NUM_MSIS 255 +#define VIRT_IRQCHIP_NUM_SOURCES VIRTIO_NDEV +#define VIRT_IRQCHIP_NUM_PRIO_BITS 3 +#define VIRT_IRQCHIP_MAX_GUESTS_BITS 3 +#define VIRT_IRQCHIP_MAX_GUESTS ((1U << VIRT_IRQCHIP_MAX_GUESTS_BITS) - 1U) + #define VIRT_PLIC_PRIORITY_BASE 0x04 #define VIRT_PLIC_PENDING_BASE 0x1000 #define VIRT_PLIC_ENABLE_BASE 0x2000 @@ -86,9 +105,15 @@ enum { #define FDT_PCI_ADDR_CELLS 3 #define FDT_PCI_INT_CELLS 1 -#define FDT_PLIC_ADDR_CELLS 0 #define FDT_PLIC_INT_CELLS 1 -#define FDT_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + 1 + \ - FDT_PLIC_ADDR_CELLS + FDT_PLIC_INT_CELLS) +#define FDT_APLIC_INT_CELLS 2 +#define FDT_IMSIC_INT_CELLS 0 +#define FDT_MAX_INT_CELLS 2 +#define FDT_MAX_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \ + 1 + FDT_MAX_INT_CELLS) +#define FDT_PLIC_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \ + 1 + FDT_PLIC_INT_CELLS) +#define FDT_APLIC_INT_MAP_WIDTH (FDT_PCI_ADDR_CELLS + FDT_PCI_INT_CELLS + \ + 1 + FDT_APLIC_INT_CELLS) #endif diff --git a/include/qemu-common.h b/include/qemu-common.h index 68b2e3bc10..8c0d9ab0f7 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -26,8 +26,6 @@ int qemu_main(int argc, char **argv, char **envp); #endif -void *qemu_oom_check(void *ptr); - ssize_t qemu_write_full(int fd, const void *buf, size_t count) QEMU_WARN_UNUSED_RESULT; diff --git a/include/qemu/coroutine-tls.h b/include/qemu/coroutine-tls.h new file mode 100644 index 0000000000..1558a826aa --- /dev/null +++ b/include/qemu/coroutine-tls.h @@ -0,0 +1,165 @@ +/* + * QEMU Thread Local Storage for coroutines + * + * Copyright Red Hat + * + * SPDX-License-Identifier: LGPL-2.1-or-later + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. + * See the COPYING.LIB file in the top-level directory. + * + * It is forbidden to access Thread Local Storage in coroutines because + * compiler optimizations may cause values to be cached across coroutine + * re-entry. Coroutines can run in more than one thread through the course of + * their life, leading bugs when stale TLS values from the wrong thread are + * used as a result of compiler optimization. + * + * An example is: + * + * ..code-block:: c + * :caption: A coroutine that may see the wrong TLS value + * + * static __thread AioContext *current_aio_context; + * ... + * static void coroutine_fn foo(void) + * { + * aio_notify(current_aio_context); + * qemu_coroutine_yield(); + * aio_notify(current_aio_context); // <-- may be stale after yielding! + * } + * + * This header provides macros for safely defining variables in Thread Local + * Storage: + * + * ..code-block:: c + * :caption: A coroutine that safely uses TLS + * + * QEMU_DEFINE_STATIC_CO_TLS(AioContext *, current_aio_context) + * ... + * static void coroutine_fn foo(void) + * { + * aio_notify(get_current_aio_context()); + * qemu_coroutine_yield(); + * aio_notify(get_current_aio_context()); // <-- safe + * } + */ + +#ifndef QEMU_COROUTINE_TLS_H +#define QEMU_COROUTINE_TLS_H + +/* + * To stop the compiler from caching TLS values we define accessor functions + * with __attribute__((noinline)) plus asm volatile("") to prevent + * optimizations that override noinline. + * + * The compiler can still analyze noinline code and make optimizations based on + * that knowledge, so an inline asm output operand is used to prevent + * optimizations that make assumptions about the address of the TLS variable. + * + * This is fragile and ultimately needs to be solved by a mechanism that is + * guaranteed to work by the compiler (e.g. stackless coroutines), but for now + * we use this approach to prevent issues. + */ + +/** + * QEMU_DECLARE_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Declare an extern variable in Thread Local Storage from a header file: + * + * .. code-block:: c + * :caption: Declaring an extern variable in Thread Local Storage + * + * QEMU_DECLARE_CO_TLS(int, my_count) + * ... + * int c = get_my_count(); + * set_my_count(c + 1); + * *get_ptr_my_count() = 0; + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Declaring a TLS variable using __thread + * + * extern __thread int my_count; + * ... + * int c = my_count; + * my_count = c + 1; + * *(&my_count) = 0; + */ +#define QEMU_DECLARE_CO_TLS(type, var) \ + __attribute__((noinline)) type get_##var(void); \ + __attribute__((noinline)) void set_##var(type v); \ + __attribute__((noinline)) type *get_ptr_##var(void); + +/** + * QEMU_DEFINE_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Define a variable in Thread Local Storage that was previously declared from + * a header file with QEMU_DECLARE_CO_TLS(): + * + * .. code-block:: c + * :caption: Defining a variable in Thread Local Storage + * + * QEMU_DEFINE_CO_TLS(int, my_count) + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Defining a TLS variable using __thread + * + * __thread int my_count; + */ +#define QEMU_DEFINE_CO_TLS(type, var) \ + static __thread type co_tls_##var; \ + type get_##var(void) { asm volatile(""); return co_tls_##var; } \ + void set_##var(type v) { asm volatile(""); co_tls_##var = v; } \ + type *get_ptr_##var(void) \ + { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; } + +/** + * QEMU_DEFINE_STATIC_CO_TLS: + * @type: the variable's C type + * @var: the variable name + * + * Define a static variable in Thread Local Storage: + * + * .. code-block:: c + * :caption: Defining a static variable in Thread Local Storage + * + * QEMU_DEFINE_STATIC_CO_TLS(int, my_count) + * ... + * int c = get_my_count(); + * set_my_count(c + 1); + * *get_ptr_my_count() = 0; + * + * This is a coroutine-safe replacement for the __thread keyword and is + * equivalent to the following code: + * + * .. code-block:: c + * :caption: Defining a static TLS variable using __thread + * + * static __thread int my_count; + * ... + * int c = my_count; + * my_count = c + 1; + * *(&my_count) = 0; + */ +#define QEMU_DEFINE_STATIC_CO_TLS(type, var) \ + static __thread type co_tls_##var; \ + static __attribute__((noinline, unused)) \ + type get_##var(void) \ + { asm volatile(""); return co_tls_##var; } \ + static __attribute__((noinline, unused)) \ + void set_##var(type v) \ + { asm volatile(""); co_tls_##var = v; } \ + static __attribute__((noinline, unused)) \ + type *get_ptr_##var(void) \ + { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; } + +#endif /* QEMU_COROUTINE_TLS_H */ diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h index 09fc245b91..7adb12d320 100644 --- a/include/qemu/cpuid.h +++ b/include/qemu/cpuid.h @@ -45,12 +45,26 @@ #ifndef bit_AVX2 #define bit_AVX2 (1 << 5) #endif -#ifndef bit_AVX512F -#define bit_AVX512F (1 << 16) -#endif #ifndef bit_BMI2 #define bit_BMI2 (1 << 8) #endif +#ifndef bit_AVX512F +#define bit_AVX512F (1 << 16) +#endif +#ifndef bit_AVX512DQ +#define bit_AVX512DQ (1 << 17) +#endif +#ifndef bit_AVX512BW +#define bit_AVX512BW (1 << 30) +#endif +#ifndef bit_AVX512VL +#define bit_AVX512VL (1u << 31) +#endif + +/* Leaf 7, %ecx */ +#ifndef bit_AVX512VBMI2 +#define bit_AVX512VBMI2 (1 << 6) +#endif /* Leaf 0x80000001, %ecx */ #ifndef bit_LZCNT diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h index 5e71b6d6f7..5bd986aa44 100644 --- a/include/qemu/hbitmap.h +++ b/include/qemu/hbitmap.h @@ -340,6 +340,18 @@ bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t start, int64_t end, int64_t max_dirty_count, int64_t *dirty_start, int64_t *dirty_count); +/* + * bdrv_dirty_bitmap_status: + * @hb: The HBitmap to operate on + * @start: The bit to start from + * @count: Number of bits to proceed + * @pnum: Out-parameter. How many bits has same value starting from @start + * + * Returns true if bitmap is dirty at @start, false otherwise. + */ +bool hbitmap_status(const HBitmap *hb, int64_t start, int64_t count, + int64_t *pnum); + /** * hbitmap_iter_next: * @hbi: HBitmapIter to operate on. diff --git a/include/qemu/job.h b/include/qemu/job.h index 6e67b6977f..c105b31076 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -169,6 +169,12 @@ typedef struct Job { * Callbacks and other information about a Job driver. */ struct JobDriver { + + /* + * These fields are initialized when this object is created, + * and are never changed afterwards + */ + /** Derived Job struct size */ size_t instance_size; @@ -184,9 +190,18 @@ struct JobDriver { * aborted. If it returns zero, the job moves into the WAITING state. If it * is the last job to complete in its transaction, all jobs in the * transaction move from WAITING to PENDING. + * + * This callback must be run in the job's context. */ int coroutine_fn (*run)(Job *job, Error **errp); + /* + * Functions run without regard to the BQL that may run in any + * arbitrary thread. These functions do not need to be thread-safe + * because the caller ensures that they are invoked from one + * thread at time. + */ + /** * If the callback is not NULL, it will be invoked when the job transitions * into the paused state. Paused jobs must not perform any asynchronous @@ -201,6 +216,13 @@ struct JobDriver { */ void coroutine_fn (*resume)(Job *job); + /* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + /** * Called when the job is resumed by the user (i.e. user_paused becomes * false). .user_resume is called before .resume. diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h index 8dbc6fcb89..7a4d6a0920 100644 --- a/include/qemu/main-loop.h +++ b/include/qemu/main-loop.h @@ -242,10 +242,52 @@ AioContext *iohandler_get_aio_context(void); * must always be taken outside other locks. This function helps * functions take different paths depending on whether the current * thread is running within the main loop mutex. + * + * This function should never be used in the block layer, because + * unit tests, block layer tools and qemu-storage-daemon do not + * have a BQL. + * Please instead refer to qemu_in_main_thread(). */ bool qemu_mutex_iothread_locked(void); /** + * qemu_in_main_thread: return whether it's possible to safely access + * the global state of the block layer. + * + * Global state of the block layer is not accessible from I/O threads + * or worker threads; only from threads that "own" the default + * AioContext that qemu_get_aio_context() returns. For tests, block + * layer tools and qemu-storage-daemon there is a designated thread that + * runs the event loop for qemu_get_aio_context(), and that is the + * main thread. + * + * For emulators, however, any thread that holds the BQL can act + * as the block layer main thread; this will be any of the actual + * main thread, the vCPU threads or the RCU thread. + * + * For clarity, do not use this function outside the block layer. + */ +bool qemu_in_main_thread(void); + +/* Mark and check that the function is part of the global state API. */ +#define GLOBAL_STATE_CODE() \ + do { \ + assert(qemu_in_main_thread()); \ + } while (0) + +/* Mark and check that the function is part of the I/O API. */ +#define IO_CODE() \ + do { \ + /* nop */ \ + } while (0) + +/* Mark and check that the function is part of the "I/O OR GS" API. */ +#define IO_OR_GS_CODE() \ + do { \ + /* nop */ \ + } while (0) + +/** * qemu_mutex_lock_iothread: Lock the main loop mutex. * * This function locks the main loop mutex. The mutex is taken by diff --git a/include/qemu/memalign.h b/include/qemu/memalign.h new file mode 100644 index 0000000000..fa299f3bf6 --- /dev/null +++ b/include/qemu/memalign.h @@ -0,0 +1,61 @@ +/* + * Allocation and free functions for aligned memory + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_MEMALIGN_H +#define QEMU_MEMALIGN_H + +/** + * qemu_try_memalign: Allocate aligned memory + * @alignment: required alignment, in bytes + * @size: size of allocation, in bytes + * + * Allocate memory on an aligned boundary (i.e. the returned + * address will be an exact multiple of @alignment). + * @alignment must be a power of 2, or the function will assert(). + * On success, returns allocated memory; on failure, returns NULL. + * + * The memory allocated through this function must be freed via + * qemu_vfree() (and not via free()). + */ +void *qemu_try_memalign(size_t alignment, size_t size); +/** + * qemu_memalign: Allocate aligned memory, without failing + * @alignment: required alignment, in bytes + * @size: size of allocation, in bytes + * + * Allocate memory in the same way as qemu_try_memalign(), but + * abort() with an error message if the memory allocation fails. + * + * The memory allocated through this function must be freed via + * qemu_vfree() (and not via free()). + */ +void *qemu_memalign(size_t alignment, size_t size); +/** + * qemu_vfree: Free memory allocated through qemu_memalign + * @ptr: memory to free + * + * This function must be used to free memory allocated via qemu_memalign() + * or qemu_try_memalign(). (Using the wrong free function will cause + * subtle bugs on Windows hosts.) + */ +void qemu_vfree(void *ptr); +/* + * It's an analog of GLIB's g_autoptr_cleanup_generic_gfree(), used to define + * g_autofree macro. + */ +static inline void qemu_cleanup_generic_vfree(void *p) +{ + void **pp = (void **)p; + qemu_vfree(*pp); +} + +/* + * Analog of g_autofree, but qemu_vfree is called on cleanup instead of g_free. + */ +#define QEMU_AUTO_VFREE __attribute__((cleanup(qemu_cleanup_generic_vfree))) + +#endif diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 7bcce3bceb..c9ec7830c9 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -379,28 +379,10 @@ extern "C" { #endif int qemu_daemon(int nochdir, int noclose); -void *qemu_try_memalign(size_t alignment, size_t size); -void *qemu_memalign(size_t alignment, size_t size); void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared, bool noreserve); -void qemu_vfree(void *ptr); void qemu_anon_ram_free(void *ptr, size_t size); -/* - * It's an analog of GLIB's g_autoptr_cleanup_generic_gfree(), used to define - * g_autofree macro. - */ -static inline void qemu_cleanup_generic_vfree(void *p) -{ - void **pp = (void **)p; - qemu_vfree(*pp); -} - -/* - * Analog of g_autofree, but qemu_vfree is called on cleanup instead of g_free. - */ -#define QEMU_AUTO_VFREE __attribute__((cleanup(qemu_cleanup_generic_vfree))) - #ifdef _WIN32 #define HAVE_CHARDEV_SERIAL 1 #elif defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \ @@ -673,19 +655,6 @@ static inline int platform_does_not_support_system(const char *command) } #endif /* !HAVE_SYSTEM_FUNCTION */ -/** - * Duplicate directory entry @dent. - * - * It is highly recommended to use this function instead of open coding - * duplication of @c dirent objects, because the actual @c struct @c dirent - * size may be bigger or shorter than @c sizeof(struct dirent) and correct - * handling is platform specific (see gitlab issue #841). - * - * @dent - original directory entry to be duplicated - * @returns duplicated directory entry which should be freed with g_free() - */ -struct dirent *qemu_dirent_dup(struct dirent *dent); - #ifdef __cplusplus } #endif diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index e69efbd47f..b063c6fde8 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -29,6 +29,7 @@ #include "qemu/atomic.h" #include "qemu/notify.h" #include "qemu/sys_membarrier.h" +#include "qemu/coroutine-tls.h" #ifdef __cplusplus extern "C" { @@ -76,11 +77,11 @@ struct rcu_reader_data { NotifierList force_rcu; }; -extern __thread struct rcu_reader_data rcu_reader; +QEMU_DECLARE_CO_TLS(struct rcu_reader_data, rcu_reader) static inline void rcu_read_lock(void) { - struct rcu_reader_data *p_rcu_reader = &rcu_reader; + struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader(); unsigned ctr; if (p_rcu_reader->depth++ > 0) { @@ -96,7 +97,7 @@ static inline void rcu_read_lock(void) static inline void rcu_read_unlock(void) { - struct rcu_reader_data *p_rcu_reader = &rcu_reader; + struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader(); assert(p_rcu_reader->depth != 0); if (--p_rcu_reader->depth > 0) { diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index 5b302cb214..42f4ceb701 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -26,6 +26,7 @@ typedef struct AddressSpace AddressSpace; typedef struct AioContext AioContext; typedef struct Aml Aml; typedef struct AnnounceTimer AnnounceTimer; +typedef struct ArchCPU ArchCPU; typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; typedef struct BdrvDirtyBitmapIter BdrvDirtyBitmapIter; typedef struct BlockBackend BlockBackend; @@ -39,6 +40,7 @@ typedef struct CompatProperty CompatProperty; typedef struct CoMutex CoMutex; typedef struct ConfidentialGuestSupport ConfidentialGuestSupport; typedef struct CPUAddressSpace CPUAddressSpace; +typedef struct CPUArchState CPUArchState; typedef struct CPUState CPUState; typedef struct DeviceListener DeviceListener; typedef struct DeviceState DeviceState; diff --git a/include/qemu/xattr.h b/include/qemu/xattr.h index a83fe8e749..f1d0f7be74 100644 --- a/include/qemu/xattr.h +++ b/include/qemu/xattr.h @@ -22,7 +22,9 @@ #ifdef CONFIG_LIBATTR # include <attr/xattr.h> #else -# define ENOATTR ENODATA +# if !defined(ENOATTR) +# define ENOATTR ENODATA +# endif # include <sys/xattr.h> #endif diff --git a/include/sysemu/accel-ops.h b/include/sysemu/accel-ops.h index 032f6979d7..6013c9444c 100644 --- a/include/sysemu/accel-ops.h +++ b/include/sysemu/accel-ops.h @@ -28,8 +28,11 @@ struct AccelOpsClass { /* initialization function called when accel is chosen */ void (*ops_init)(AccelOpsClass *ops); + bool (*cpus_are_resettable)(void); + void (*create_vcpu_thread)(CPUState *cpu); /* MANDATORY NON-NULL */ void (*kick_vcpu_thread)(CPUState *cpu); + bool (*cpu_thread_is_idle)(CPUState *cpu); void (*synchronize_post_reset)(CPUState *cpu); void (*synchronize_post_init)(CPUState *cpu); diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h index 70c579560a..79c2591425 100644 --- a/include/sysemu/arch_init.h +++ b/include/sysemu/arch_init.h @@ -28,4 +28,6 @@ enum { extern const uint32_t arch_type; +void qemu_init_arch_modules(void); + #endif diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h new file mode 100644 index 0000000000..2391679c56 --- /dev/null +++ b/include/sysemu/block-backend-common.h @@ -0,0 +1,102 @@ +/* + * QEMU Block backends + * + * Copyright (C) 2014-2016 Red Hat, Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_BACKEND_COMMON_H +#define BLOCK_BACKEND_COMMON_H + +#include "qemu/iov.h" +#include "block/throttle-groups.h" + +/* + * TODO Have to include block/block.h for a bunch of block layer + * types. Unfortunately, this pulls in the whole BlockDriverState + * API, which we don't want used by many BlockBackend users. Some of + * the types belong here, and the rest should be split into a common + * header and one for the BlockDriverState API. + */ +#include "block/block.h" + +/* Callbacks for block device models */ +typedef struct BlockDevOps { + + /* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + + /* + * Runs when virtual media changed (monitor commands eject, change) + * Argument load is true on load and false on eject. + * Beware: doesn't run when a host device's physical media + * changes. Sure would be useful if it did. + * Device models with removable media must implement this callback. + */ + void (*change_media_cb)(void *opaque, bool load, Error **errp); + /* + * Runs when an eject request is issued from the monitor, the tray + * is closed, and the medium is locked. + * Device models that do not implement is_medium_locked will not need + * this callback. Device models that can lock the medium or tray might + * want to implement the callback and unlock the tray when "force" is + * true, even if they do not support eject requests. + */ + void (*eject_request_cb)(void *opaque, bool force); + + /* + * Is the virtual medium locked into the device? + * Device models implement this only when device has such a lock. + */ + bool (*is_medium_locked)(void *opaque); + + /* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + + /* + * Is the virtual tray open? + * Device models implement this only when the device has a tray. + */ + bool (*is_tray_open)(void *opaque); + + /* + * Runs when the size changed (e.g. monitor command block_resize) + */ + void (*resize_cb)(void *opaque); + /* + * Runs when the backend receives a drain request. + */ + void (*drained_begin)(void *opaque); + /* + * Runs when the backend's last drain request ends. + */ + void (*drained_end)(void *opaque); + /* + * Is the device still busy? + */ + bool (*drained_poll)(void *opaque); +} BlockDevOps; + +/* + * This struct is embedded in (the private) BlockBackend struct and contains + * fields that must be public. This is in particular for QLIST_ENTRY() and + * friends so that BlockBackends can be kept in lists outside block-backend.c + */ +typedef struct BlockBackendPublic { + ThrottleGroupMember throttle_group_member; +} BlockBackendPublic; + +#endif /* BLOCK_BACKEND_COMMON_H */ diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h new file mode 100644 index 0000000000..2e93a74679 --- /dev/null +++ b/include/sysemu/block-backend-global-state.h @@ -0,0 +1,116 @@ +/* + * QEMU Block backends + * + * Copyright (C) 2014-2016 Red Hat, Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_BACKEND_GS_H +#define BLOCK_BACKEND_GS_H + +#include "block-backend-common.h" + +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + +BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm); +BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, Error **errp); +BlockBackend *blk_new_open(const char *filename, const char *reference, + QDict *options, int flags, Error **errp); +int blk_get_refcnt(BlockBackend *blk); +void blk_ref(BlockBackend *blk); +void blk_unref(BlockBackend *blk); +void blk_remove_all_bs(void); +BlockBackend *blk_by_name(const char *name); +BlockBackend *blk_next(BlockBackend *blk); +BlockBackend *blk_all_next(BlockBackend *blk); +bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp); +void monitor_remove_blk(BlockBackend *blk); + +BlockBackendPublic *blk_get_public(BlockBackend *blk); +BlockBackend *blk_by_public(BlockBackendPublic *public); + +void blk_remove_bs(BlockBackend *blk); +int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp); +int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp); +bool bdrv_has_blk(BlockDriverState *bs); +bool bdrv_is_root_node(BlockDriverState *bs); +int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp); +void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); + +void blk_iostatus_enable(BlockBackend *blk); +BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk); +void blk_iostatus_disable(BlockBackend *blk); +void blk_iostatus_reset(BlockBackend *blk); +int blk_attach_dev(BlockBackend *blk, DeviceState *dev); +void blk_detach_dev(BlockBackend *blk, DeviceState *dev); +DeviceState *blk_get_attached_dev(BlockBackend *blk); +BlockBackend *blk_by_dev(void *dev); +BlockBackend *blk_by_qdev_id(const char *id, Error **errp); +void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); + +void blk_activate(BlockBackend *blk, Error **errp); + +int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags); +void blk_aio_cancel(BlockAIOCB *acb); +int blk_commit_all(void); +void blk_drain(BlockBackend *blk); +void blk_drain_all(void); +void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error); +bool blk_supports_write_perm(BlockBackend *blk); +bool blk_is_sg(BlockBackend *blk); +void blk_set_enable_write_cache(BlockBackend *blk, bool wce); +int blk_get_flags(BlockBackend *blk); +bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp); +void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason); +void blk_op_block_all(BlockBackend *blk, Error *reason); +void blk_op_unblock_all(BlockBackend *blk, Error *reason); +int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, + Error **errp); +void blk_add_aio_context_notifier(BlockBackend *blk, + void (*attached_aio_context)(AioContext *new_context, void *opaque), + void (*detach_aio_context)(void *opaque), void *opaque); +void blk_remove_aio_context_notifier(BlockBackend *blk, + void (*attached_aio_context)(AioContext *, + void *), + void (*detach_aio_context)(void *), + void *opaque); +void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify); +void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify); +BlockBackendRootState *blk_get_root_state(BlockBackend *blk); +void blk_update_root_state(BlockBackend *blk); +bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk); +int blk_get_open_flags_from_root_state(BlockBackend *blk); + +int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, + int64_t pos, int size); +int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size); +int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz); +int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo); + +void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg); +void blk_io_limits_disable(BlockBackend *blk); +void blk_io_limits_enable(BlockBackend *blk, const char *group); +void blk_io_limits_update_group(BlockBackend *blk, const char *group); +void blk_set_force_allow_inactivate(BlockBackend *blk); + +void blk_register_buf(BlockBackend *blk, void *host, size_t size); +void blk_unregister_buf(BlockBackend *blk, void *host); + +const BdrvChild *blk_root(BlockBackend *blk); + +int blk_make_empty(BlockBackend *blk, Error **errp); + +#endif /* BLOCK_BACKEND_GS_H */ diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h new file mode 100644 index 0000000000..6517c39295 --- /dev/null +++ b/include/sysemu/block-backend-io.h @@ -0,0 +1,161 @@ +/* + * QEMU Block backends + * + * Copyright (C) 2014-2016 Red Hat, Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#ifndef BLOCK_BACKEND_IO_H +#define BLOCK_BACKEND_IO_H + +#include "block-backend-common.h" + +/* + * I/O API functions. These functions are thread-safe. + * + * See include/block/block-io.h for more information about + * the I/O API. + */ + +const char *blk_name(const BlockBackend *blk); + +BlockDriverState *blk_bs(BlockBackend *blk); + +void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); +void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow); +void blk_set_disable_request_queuing(BlockBackend *blk, bool disable); +bool blk_iostatus_is_enabled(const BlockBackend *blk); + +char *blk_get_attached_dev_id(BlockBackend *blk); + +BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int64_t bytes, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); + +BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, + QEMUIOVector *qiov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, + QEMUIOVector *qiov, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_flush(BlockBackend *blk, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes, + BlockCompletionFunc *cb, void *opaque); +void blk_aio_cancel_async(BlockAIOCB *acb); +BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, + BlockCompletionFunc *cb, void *opaque); + +void blk_inc_in_flight(BlockBackend *blk); +void blk_dec_in_flight(BlockBackend *blk); +bool blk_is_inserted(BlockBackend *blk); +bool blk_is_available(BlockBackend *blk); +void blk_lock_medium(BlockBackend *blk, bool locked); +void blk_eject(BlockBackend *blk, bool eject_flag); +int64_t blk_getlength(BlockBackend *blk); +void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr); +int64_t blk_nb_sectors(BlockBackend *blk); +void *blk_try_blockalign(BlockBackend *blk, size_t size); +void *blk_blockalign(BlockBackend *blk, size_t size); +bool blk_is_writable(BlockBackend *blk); +bool blk_enable_write_cache(BlockBackend *blk); +BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read); +BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, + int error); +void blk_error_action(BlockBackend *blk, BlockErrorAction action, + bool is_read, int error); +void blk_iostatus_set_err(BlockBackend *blk, int error); +int blk_get_max_iov(BlockBackend *blk); +int blk_get_max_hw_iov(BlockBackend *blk); +void blk_set_guest_block_size(BlockBackend *blk, int align); + +void blk_io_plug(BlockBackend *blk); +void blk_io_unplug(BlockBackend *blk); +AioContext *blk_get_aio_context(BlockBackend *blk); +BlockAcctStats *blk_get_stats(BlockBackend *blk); +void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, + BlockCompletionFunc *cb, + void *opaque, int ret); + +uint32_t blk_get_request_alignment(BlockBackend *blk); +uint32_t blk_get_max_transfer(BlockBackend *blk); +uint64_t blk_get_max_hw_transfer(BlockBackend *blk); + +int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, + BlockBackend *blk_out, int64_t off_out, + int64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + + +/* + * "I/O or GS" API functions. These functions can run without + * the BQL, but only in one specific iothread/main loop. + * + * See include/block/block-io.h for more information about + * the "I/O or GS" API. + */ + +int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes); +int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes, + BdrvRequestFlags flags); +int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); +int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, + int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset, + BdrvRequestFlags flags); +int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); + +static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, + int64_t bytes, void *buf, + BdrvRequestFlags flags) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_OR_GS_CODE(); + + assert(bytes <= SIZE_MAX); + + return blk_co_preadv(blk, offset, bytes, &qiov, flags); +} + +static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, + int64_t bytes, void *buf, + BdrvRequestFlags flags) +{ + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); + IO_OR_GS_CODE(); + + assert(bytes <= SIZE_MAX); + + return blk_co_pwritev(blk, offset, bytes, &qiov, flags); +} + +int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, + int64_t bytes); + +int coroutine_fn blk_co_flush(BlockBackend *blk); +int blk_flush(BlockBackend *blk); + +int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf); + +int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, + int64_t bytes); +int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); +int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int64_t bytes, BdrvRequestFlags flags); +int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int64_t bytes, BdrvRequestFlags flags); +int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, + PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); + +#endif /* BLOCK_BACKEND_IO_H */ diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index e5e1524f06..038be9fc40 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -13,272 +13,9 @@ #ifndef BLOCK_BACKEND_H #define BLOCK_BACKEND_H -#include "qemu/iov.h" -#include "block/throttle-groups.h" +#include "block-backend-global-state.h" +#include "block-backend-io.h" -/* - * TODO Have to include block/block.h for a bunch of block layer - * types. Unfortunately, this pulls in the whole BlockDriverState - * API, which we don't want used by many BlockBackend users. Some of - * the types belong here, and the rest should be split into a common - * header and one for the BlockDriverState API. - */ -#include "block/block.h" - -/* Callbacks for block device models */ -typedef struct BlockDevOps { - /* - * Runs when virtual media changed (monitor commands eject, change) - * Argument load is true on load and false on eject. - * Beware: doesn't run when a host device's physical media - * changes. Sure would be useful if it did. - * Device models with removable media must implement this callback. - */ - void (*change_media_cb)(void *opaque, bool load, Error **errp); - /* - * Runs when an eject request is issued from the monitor, the tray - * is closed, and the medium is locked. - * Device models that do not implement is_medium_locked will not need - * this callback. Device models that can lock the medium or tray might - * want to implement the callback and unlock the tray when "force" is - * true, even if they do not support eject requests. - */ - void (*eject_request_cb)(void *opaque, bool force); - /* - * Is the virtual tray open? - * Device models implement this only when the device has a tray. - */ - bool (*is_tray_open)(void *opaque); - /* - * Is the virtual medium locked into the device? - * Device models implement this only when device has such a lock. - */ - bool (*is_medium_locked)(void *opaque); - /* - * Runs when the size changed (e.g. monitor command block_resize) - */ - void (*resize_cb)(void *opaque); - /* - * Runs when the backend receives a drain request. - */ - void (*drained_begin)(void *opaque); - /* - * Runs when the backend's last drain request ends. - */ - void (*drained_end)(void *opaque); - /* - * Is the device still busy? - */ - bool (*drained_poll)(void *opaque); -} BlockDevOps; - -/* This struct is embedded in (the private) BlockBackend struct and contains - * fields that must be public. This is in particular for QLIST_ENTRY() and - * friends so that BlockBackends can be kept in lists outside block-backend.c - * */ -typedef struct BlockBackendPublic { - ThrottleGroupMember throttle_group_member; -} BlockBackendPublic; - -BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm); -BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, - uint64_t shared_perm, Error **errp); -BlockBackend *blk_new_open(const char *filename, const char *reference, - QDict *options, int flags, Error **errp); -int blk_get_refcnt(BlockBackend *blk); -void blk_ref(BlockBackend *blk); -void blk_unref(BlockBackend *blk); -void blk_remove_all_bs(void); -const char *blk_name(const BlockBackend *blk); -BlockBackend *blk_by_name(const char *name); -BlockBackend *blk_next(BlockBackend *blk); -BlockBackend *blk_all_next(BlockBackend *blk); -bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp); -void monitor_remove_blk(BlockBackend *blk); - -BlockBackendPublic *blk_get_public(BlockBackend *blk); -BlockBackend *blk_by_public(BlockBackendPublic *public); - -BlockDriverState *blk_bs(BlockBackend *blk); -void blk_remove_bs(BlockBackend *blk); -int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp); -int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp); -bool bdrv_has_blk(BlockDriverState *bs); -bool bdrv_is_root_node(BlockDriverState *bs); -int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, - Error **errp); -void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); - -void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow); -void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow); -void blk_set_disable_request_queuing(BlockBackend *blk, bool disable); -void blk_iostatus_enable(BlockBackend *blk); -bool blk_iostatus_is_enabled(const BlockBackend *blk); -BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk); -void blk_iostatus_disable(BlockBackend *blk); -void blk_iostatus_reset(BlockBackend *blk); -void blk_iostatus_set_err(BlockBackend *blk, int error); -int blk_attach_dev(BlockBackend *blk, DeviceState *dev); -void blk_detach_dev(BlockBackend *blk, DeviceState *dev); -DeviceState *blk_get_attached_dev(BlockBackend *blk); -char *blk_get_attached_dev_id(BlockBackend *blk); -BlockBackend *blk_by_dev(void *dev); -BlockBackend *blk_by_qdev_id(const char *id, Error **errp); -void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); -int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); -int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, - int64_t bytes, - QEMUIOVector *qiov, size_t qiov_offset, - BdrvRequestFlags flags); -int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); - -static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, - int64_t bytes, void *buf, - BdrvRequestFlags flags) -{ - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); - - assert(bytes <= SIZE_MAX); - - return blk_co_preadv(blk, offset, bytes, &qiov, flags); -} - -static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, - int64_t bytes, void *buf, - BdrvRequestFlags flags) -{ - QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes); - - assert(bytes <= SIZE_MAX); - - return blk_co_pwritev(blk, offset, bytes, &qiov, flags); -} - -int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int64_t bytes, BdrvRequestFlags flags); -BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int64_t bytes, BdrvRequestFlags flags, - BlockCompletionFunc *cb, void *opaque); -int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags); -int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes); -int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes, - BdrvRequestFlags flags); -int64_t blk_getlength(BlockBackend *blk); -void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr); -int64_t blk_nb_sectors(BlockBackend *blk); -BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset, - QEMUIOVector *qiov, BdrvRequestFlags flags, - BlockCompletionFunc *cb, void *opaque); -BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, - QEMUIOVector *qiov, BdrvRequestFlags flags, - BlockCompletionFunc *cb, void *opaque); -BlockAIOCB *blk_aio_flush(BlockBackend *blk, - BlockCompletionFunc *cb, void *opaque); -BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes, - BlockCompletionFunc *cb, void *opaque); -void blk_aio_cancel(BlockAIOCB *acb); -void blk_aio_cancel_async(BlockAIOCB *acb); -int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf); -BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, - BlockCompletionFunc *cb, void *opaque); -int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, - int64_t bytes); -int coroutine_fn blk_co_flush(BlockBackend *blk); -int blk_flush(BlockBackend *blk); -int blk_commit_all(void); -void blk_inc_in_flight(BlockBackend *blk); -void blk_dec_in_flight(BlockBackend *blk); -void blk_drain(BlockBackend *blk); -void blk_drain_all(void); -void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, - BlockdevOnError on_write_error); -BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read); -BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, - int error); -void blk_error_action(BlockBackend *blk, BlockErrorAction action, - bool is_read, int error); -bool blk_supports_write_perm(BlockBackend *blk); -bool blk_is_writable(BlockBackend *blk); -bool blk_is_sg(BlockBackend *blk); -bool blk_enable_write_cache(BlockBackend *blk); -void blk_set_enable_write_cache(BlockBackend *blk, bool wce); -void blk_invalidate_cache(BlockBackend *blk, Error **errp); -bool blk_is_inserted(BlockBackend *blk); -bool blk_is_available(BlockBackend *blk); -void blk_lock_medium(BlockBackend *blk, bool locked); -void blk_eject(BlockBackend *blk, bool eject_flag); -int blk_get_flags(BlockBackend *blk); -uint32_t blk_get_request_alignment(BlockBackend *blk); -uint32_t blk_get_max_transfer(BlockBackend *blk); -uint64_t blk_get_max_hw_transfer(BlockBackend *blk); -int blk_get_max_iov(BlockBackend *blk); -int blk_get_max_hw_iov(BlockBackend *blk); -void blk_set_guest_block_size(BlockBackend *blk, int align); -void *blk_try_blockalign(BlockBackend *blk, size_t size); -void *blk_blockalign(BlockBackend *blk, size_t size); -bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp); -void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason); -void blk_op_block_all(BlockBackend *blk, Error *reason); -void blk_op_unblock_all(BlockBackend *blk, Error *reason); -AioContext *blk_get_aio_context(BlockBackend *blk); -int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, - Error **errp); -void blk_add_aio_context_notifier(BlockBackend *blk, - void (*attached_aio_context)(AioContext *new_context, void *opaque), - void (*detach_aio_context)(void *opaque), void *opaque); -void blk_remove_aio_context_notifier(BlockBackend *blk, - void (*attached_aio_context)(AioContext *, - void *), - void (*detach_aio_context)(void *), - void *opaque); -void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify); -void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify); -void blk_io_plug(BlockBackend *blk); -void blk_io_unplug(BlockBackend *blk); -BlockAcctStats *blk_get_stats(BlockBackend *blk); -BlockBackendRootState *blk_get_root_state(BlockBackend *blk); -void blk_update_root_state(BlockBackend *blk); -bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk); -int blk_get_open_flags_from_root_state(BlockBackend *blk); - -void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, - BlockCompletionFunc *cb, void *opaque); -int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int64_t bytes, BdrvRequestFlags flags); -int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, - int64_t bytes); -int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, - PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); -int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); -int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, - int64_t pos, int size); -int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size); -int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz); -int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo); -BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, - BlockCompletionFunc *cb, - void *opaque, int ret); - -void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg); -void blk_io_limits_disable(BlockBackend *blk); -void blk_io_limits_enable(BlockBackend *blk, const char *group); -void blk_io_limits_update_group(BlockBackend *blk, const char *group); -void blk_set_force_allow_inactivate(BlockBackend *blk); - -void blk_register_buf(BlockBackend *blk, void *host, size_t size); -void blk_unregister_buf(BlockBackend *blk, void *host); - -int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, - BlockBackend *blk_out, int64_t off_out, - int64_t bytes, BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); - -const BdrvChild *blk_root(BlockBackend *blk); - -int blk_make_empty(BlockBackend *blk, Error **errp); +/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */ #endif diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h index f9fb54d437..3211b16513 100644 --- a/include/sysemu/blockdev.h +++ b/include/sysemu/blockdev.h @@ -13,9 +13,6 @@ #include "block/block.h" #include "qemu/queue.h" -void blockdev_mark_auto_del(BlockBackend *blk); -void blockdev_auto_del(BlockBackend *blk); - typedef enum { IF_DEFAULT = -1, /* for use with drive_add() only */ /* @@ -38,6 +35,16 @@ struct DriveInfo { QTAILQ_ENTRY(DriveInfo) next; }; +/* + * Global state (GS) API. These functions run under the BQL. + * + * See include/block/block-global-state.h for more information about + * the GS API. + */ + +void blockdev_mark_auto_del(BlockBackend *blk); +void blockdev_auto_del(BlockBackend *blk); + DriveInfo *blk_legacy_dinfo(BlockBackend *blk); DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo); BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo); diff --git a/include/sysemu/hax.h b/include/sysemu/hax.h index 247f0661d1..bf8f99a824 100644 --- a/include/sysemu/hax.h +++ b/include/sysemu/hax.h @@ -25,17 +25,23 @@ int hax_sync_vcpus(void); #ifdef NEED_CPU_H +# ifdef CONFIG_HAX +# define CONFIG_HAX_IS_POSSIBLE +# endif +#else /* !NEED_CPU_H */ +# define CONFIG_HAX_IS_POSSIBLE +#endif -#ifdef CONFIG_HAX +#ifdef CONFIG_HAX_IS_POSSIBLE -int hax_enabled(void); +extern bool hax_allowed; -#else /* CONFIG_HAX */ +#define hax_enabled() (hax_allowed) -#define hax_enabled() (0) +#else /* !CONFIG_HAX_IS_POSSIBLE */ -#endif /* CONFIG_HAX */ +#define hax_enabled() (0) -#endif /* NEED_CPU_H */ +#endif /* CONFIG_HAX_IS_POSSIBLE */ #endif /* QEMU_HAX_H */ diff --git a/include/sysemu/hw_accel.h b/include/sysemu/hw_accel.h index 01b5ebf442..22903a55f7 100644 --- a/include/sysemu/hw_accel.h +++ b/include/sysemu/hw_accel.h @@ -23,9 +23,4 @@ void cpu_synchronize_post_reset(CPUState *cpu); void cpu_synchronize_post_init(CPUState *cpu); void cpu_synchronize_pre_loadvm(CPUState *cpu); -static inline bool cpu_check_are_resettable(void) -{ - return kvm_enabled() ? kvm_cpu_check_are_resettable() : true; -} - #endif /* QEMU_HW_ACCEL_H */ diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 6eb39a088b..a5bec96fb0 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -249,6 +249,9 @@ int kvm_has_intx_set_mask(void); bool kvm_arm_supports_user_irq(void); +int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); +int kvm_on_sigbus(int code, void *addr); + #ifdef NEED_CPU_H #include "cpu.h" @@ -261,9 +264,6 @@ int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr, void kvm_remove_all_breakpoints(CPUState *cpu); int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap); -int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); -int kvm_on_sigbus(int code, void *addr); - /* internal API */ int kvm_ioctl(KVMState *s, int type, ...); diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h index 4b20f1a639..3bbeb1bcb4 100644 --- a/include/sysemu/memory_mapping.h +++ b/include/sysemu/memory_mapping.h @@ -15,8 +15,7 @@ #define MEMORY_MAPPING_H #include "qemu/queue.h" -#include "exec/cpu-defs.h" -#include "exec/memory.h" +#include "exec/cpu-common.h" typedef struct GuestPhysBlock { /* visible to guest, reflects PCI hole, etc */ @@ -43,7 +42,7 @@ typedef struct GuestPhysBlockList { /* The physical and virtual address in the memory mapping are contiguous. */ typedef struct MemoryMapping { hwaddr phys_addr; - target_ulong virt_addr; + vaddr virt_addr; ram_addr_t length; QTAILQ_ENTRY(MemoryMapping) next; } MemoryMapping; diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h index 2edf33658a..dd64fb401d 100644 --- a/include/sysemu/os-posix.h +++ b/include/sysemu/os-posix.h @@ -55,6 +55,7 @@ int os_mlock(void); typedef struct timeval qemu_timeval; #define qemu_gettimeofday(tp) gettimeofday(tp, NULL) +int os_set_daemonize(bool d); bool is_daemonized(void); /** diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h index 43f569b5c2..770752222a 100644 --- a/include/sysemu/os-win32.h +++ b/include/sysemu/os-win32.h @@ -77,6 +77,14 @@ typedef struct { } qemu_timeval; int qemu_gettimeofday(qemu_timeval *tp); +static inline int os_set_daemonize(bool d) +{ + if (d) { + return -ENOTSUP; + } + return 0; +} + static inline bool is_daemonized(void) { return false; diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h index 675873e200..dd444734d9 100644 --- a/include/tcg/tcg-opc.h +++ b/include/tcg/tcg-opc.h @@ -245,6 +245,9 @@ DEF(or_vec, 1, 2, 0, IMPLVEC) DEF(xor_vec, 1, 2, 0, IMPLVEC) DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec)) DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec)) +DEF(nand_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nand_vec)) +DEF(nor_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nor_vec)) +DEF(eqv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_eqv_vec)) DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec)) DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec)) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 42f5b500ed..73869fd9d0 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -43,7 +43,7 @@ #else #define MAX_OPC_PARAM_PER_ARG 1 #endif -#define MAX_OPC_PARAM_IARGS 6 +#define MAX_OPC_PARAM_IARGS 7 #define MAX_OPC_PARAM_OARGS 1 #define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS) @@ -183,6 +183,9 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_not_vec 0 #define TCG_TARGET_HAS_andc_vec 0 #define TCG_TARGET_HAS_orc_vec 0 +#define TCG_TARGET_HAS_nand_vec 0 +#define TCG_TARGET_HAS_nor_vec 0 +#define TCG_TARGET_HAS_eqv_vec 0 #define TCG_TARGET_HAS_roti_vec 0 #define TCG_TARGET_HAS_rots_vec 0 #define TCG_TARGET_HAS_rotv_vec 0 @@ -381,6 +381,8 @@ void job_ref(Job *job) void job_unref(Job *job) { + GLOBAL_STATE_CODE(); + if (--job->refcnt == 0) { assert(job->status == JOB_STATUS_NULL); assert(!timer_pending(&job->sleep_timer)); @@ -602,6 +604,7 @@ bool job_user_paused(Job *job) void job_user_resume(Job *job, Error **errp) { assert(job); + GLOBAL_STATE_CODE(); if (!job->user_paused || job->pause_count <= 0) { error_setg(errp, "Can't resume a job that was not paused"); return; @@ -672,6 +675,7 @@ static void job_update_rc(Job *job) static void job_commit(Job *job) { assert(!job->ret); + GLOBAL_STATE_CODE(); if (job->driver->commit) { job->driver->commit(job); } @@ -680,6 +684,7 @@ static void job_commit(Job *job) static void job_abort(Job *job) { assert(job->ret); + GLOBAL_STATE_CODE(); if (job->driver->abort) { job->driver->abort(job); } @@ -687,6 +692,7 @@ static void job_abort(Job *job) static void job_clean(Job *job) { + GLOBAL_STATE_CODE(); if (job->driver->clean) { job->driver->clean(job); } @@ -726,6 +732,7 @@ static int job_finalize_single(Job *job) static void job_cancel_async(Job *job, bool force) { + GLOBAL_STATE_CODE(); if (job->driver->cancel) { force = job->driver->cancel(job, force); } else { @@ -825,6 +832,7 @@ static void job_completed_txn_abort(Job *job) static int job_prepare(Job *job) { + GLOBAL_STATE_CODE(); if (job->ret == 0 && job->driver->prepare) { job->ret = job->driver->prepare(job); job_update_rc(job); @@ -952,6 +960,7 @@ static void coroutine_fn job_co_entry(void *opaque) Job *job = opaque; assert(job && job->driver && job->driver->run); + assert(job->aio_context == qemu_get_current_aio_context()); job_pause_point(job); job->ret = job->driver->run(job, &job->err); job->deferred_to_main_loop = true; @@ -1054,6 +1063,7 @@ void job_complete(Job *job, Error **errp) { /* Should not be reachable via external interface for internal jobs */ assert(job->id); + GLOBAL_STATE_CODE(); if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) { return; } diff --git a/meson.build b/meson.build index 28612fca36..2d6601467f 100644 --- a/meson.build +++ b/meson.build @@ -1462,14 +1462,16 @@ dbus_display = get_option('dbus_display') \ .allowed() have_virtfs = get_option('virtfs') \ - .require(targetos == 'linux', - error_message: 'virtio-9p (virtfs) requires Linux') \ - .require(libattr.found() and libcap_ng.found(), - error_message: 'virtio-9p (virtfs) requires libcap-ng-devel and libattr-devel') \ + .require(targetos == 'linux' or targetos == 'darwin', + error_message: 'virtio-9p (virtfs) requires Linux or macOS') \ + .require(targetos == 'linux' or cc.has_function('pthread_fchdir_np'), + error_message: 'virtio-9p (virtfs) on macOS requires the presence of pthread_fchdir_np') \ + .require(targetos == 'darwin' or (libattr.found() and libcap_ng.found()), + error_message: 'virtio-9p (virtfs) on Linux requires libcap-ng-devel and libattr-devel') \ .disable_auto_if(not have_tools and not have_system) \ .allowed() -have_virtfs_proxy_helper = have_virtfs and have_tools +have_virtfs_proxy_helper = targetos != 'darwin' and have_virtfs and have_tools foreach k : get_option('trace_backends') config_host_data.set('CONFIG_TRACE_' + k.to_upper(), true) @@ -1619,9 +1621,15 @@ config_host_data.set('CONFIG_CLOCK_ADJTIME', cc.has_function('clock_adjtime')) config_host_data.set('CONFIG_DUP3', cc.has_function('dup3')) config_host_data.set('CONFIG_FALLOCATE', cc.has_function('fallocate')) config_host_data.set('CONFIG_POSIX_FALLOCATE', cc.has_function('posix_fallocate')) -config_host_data.set('CONFIG_POSIX_MEMALIGN', cc.has_function('posix_memalign')) +# Note that we need to specify prefix: here to avoid incorrectly +# thinking that Windows has posix_memalign() +config_host_data.set('CONFIG_POSIX_MEMALIGN', cc.has_function('posix_memalign', prefix: '#include <stdlib.h>')) +config_host_data.set('CONFIG_ALIGNED_MALLOC', cc.has_function('_aligned_malloc')) +config_host_data.set('CONFIG_VALLOC', cc.has_function('valloc')) +config_host_data.set('CONFIG_MEMALIGN', cc.has_function('memalign')) config_host_data.set('CONFIG_PPOLL', cc.has_function('ppoll')) config_host_data.set('CONFIG_PREADV', cc.has_function('preadv', prefix: '#include <sys/uio.h>')) +config_host_data.set('CONFIG_PTHREAD_FCHDIR_NP', cc.has_function('pthread_fchdir_np')) config_host_data.set('CONFIG_SEM_TIMEDWAIT', cc.has_function('sem_timedwait', dependencies: threads)) config_host_data.set('CONFIG_SENDFILE', cc.has_function('sendfile')) config_host_data.set('CONFIG_SETNS', cc.has_function('setns') and cc.has_function('unshare')) @@ -2432,8 +2440,8 @@ if get_option('cfi') and slirp_opt == 'system' endif fdt = not_found -fdt_opt = get_option('fdt') if have_system + fdt_opt = get_option('fdt') if fdt_opt in ['enabled', 'auto', 'system'] have_internal = fs.exists(meson.current_source_dir() / 'dtc/libfdt/Makefile.libfdt') fdt = cc.find_library('fdt', kwargs: static_kwargs, @@ -2476,6 +2484,8 @@ if have_system fdt = declare_dependency(link_with: libfdt, include_directories: fdt_inc) endif +else + fdt_opt = 'disabled' endif if not fdt.found() and fdt_required.length() > 0 error('fdt not available but required by targets ' + ', '.join(fdt_required)) @@ -2705,6 +2715,7 @@ if have_system or have_user 'target/i386', 'target/i386/kvm', 'target/mips/tcg', + 'target/nios2', 'target/ppc', 'target/riscv', 'target/s390x', diff --git a/migration/block.c b/migration/block.c index a950977855..077a413325 100644 --- a/migration/block.c +++ b/migration/block.c @@ -932,7 +932,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) return -EINVAL; } - blk_invalidate_cache(blk, &local_err); + blk_activate(blk, &local_err); if (local_err) { error_report_err(local_err); return -EINVAL; diff --git a/migration/migration.c b/migration/migration.c index 9cc344514b..695f0f2900 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -503,9 +503,9 @@ static void process_incoming_migration_bh(void *opaque) if (!migrate_late_block_activate() || (autostart && (!global_state_received() || global_state_get_runstate() == RUN_STATE_RUNNING))) { - /* Make sure all file formats flush their mutable metadata. + /* Make sure all file formats throw away their mutable metadata. * If we get an error here, just don't restart the VM yet. */ - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); local_err = NULL; @@ -591,8 +591,8 @@ static void process_incoming_migration_co(void *opaque) /* we get COLO info, and know if we are in COLO mode */ if (!ret && migration_incoming_colo_enabled()) { - /* Make sure all file formats flush their mutable metadata */ - bdrv_invalidate_cache_all(&local_err); + /* Make sure all file formats throw away their mutable metadata */ + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); goto fail; @@ -1932,7 +1932,7 @@ static void migrate_fd_cancel(MigrationState *s) if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) { Error *local_err = NULL; - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); } else { @@ -3111,7 +3111,7 @@ fail: */ Error *local_err = NULL; - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); } @@ -3256,7 +3256,7 @@ fail_invalidate: Error *local_err = NULL; qemu_mutex_lock_iothread(); - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); } else { diff --git a/migration/savevm.c b/migration/savevm.c index 967ff80547..02ed94c180 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1438,7 +1438,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, if (inactivate_disks) { /* Inactivate before sending QEMU_VM_EOF so that the - * bdrv_invalidate_cache_all() on the other end won't fail. */ + * bdrv_activate_all() on the other end won't fail. */ ret = bdrv_inactivate_all(); if (ret) { error_report("%s: bdrv_inactivate_all() failed (%d)", @@ -2013,9 +2013,9 @@ static void loadvm_postcopy_handle_run_bh(void *opaque) trace_loadvm_postcopy_handle_run_bh("after announce"); - /* Make sure all file formats flush their mutable metadata. + /* Make sure all file formats throw away their mutable metadata. * If we get an error here, just don't restart the VM yet. */ - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); local_err = NULL; @@ -2808,6 +2808,8 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, g_autoptr(GDateTime) now = g_date_time_new_now_local(); AioContext *aio_context; + GLOBAL_STATE_CODE(); + if (migration_is_blocked(errp)) { return false; } diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c index df97582dd4..ad82c275c4 100644 --- a/monitor/qmp-cmds.c +++ b/monitor/qmp-cmds.c @@ -144,7 +144,7 @@ void qmp_cont(Error **errp) * If there are no inactive block nodes (e.g. because the VM was just * paused rather than completing a migration), bdrv_inactivate_all() simply * doesn't do anything. */ - bdrv_invalidate_cache_all(&local_err); + bdrv_activate_all(&local_err); if (local_err) { error_propagate(errp, local_err); return; diff --git a/nbd/server.c b/nbd/server.c index 9fb2f26402..53e68cf027 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -25,6 +25,7 @@ #include "trace.h" #include "nbd-internal.h" #include "qemu/units.h" +#include "qemu/memalign.h" #define NBD_META_ID_BASE_ALLOCATION 0 #define NBD_META_ID_ALLOCATION_DEPTH 1 diff --git a/net/l2tpv3.c b/net/l2tpv3.c index e4d4218db6..b8faa8796c 100644 --- a/net/l2tpv3.c +++ b/net/l2tpv3.c @@ -34,7 +34,7 @@ #include "qemu/sockets.h" #include "qemu/iov.h" #include "qemu/main-loop.h" - +#include "qemu/memalign.h" /* The buffer size needs to be investigated for optimum numbers and * optimum means of paging in on different systems. This size is diff --git a/os-posix.c b/os-posix.c index ae6c9f2a5e..24692c8593 100644 --- a/os-posix.c +++ b/os-posix.c @@ -317,6 +317,12 @@ bool is_daemonized(void) return daemonize; } +int os_set_daemonize(bool d) +{ + daemonize = d; + return 0; +} + int os_mlock(void) { #ifdef HAVE_MLOCKALL diff --git a/pc-bios/bios-256k.bin b/pc-bios/bios-256k.bin Binary files differindex e0796344df..6163fb8149 100644 --- a/pc-bios/bios-256k.bin +++ b/pc-bios/bios-256k.bin diff --git a/pc-bios/bios-microvm.bin b/pc-bios/bios-microvm.bin Binary files differindex f0215521b0..97fbd3192a 100644 --- a/pc-bios/bios-microvm.bin +++ b/pc-bios/bios-microvm.bin diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin Binary files differindex bcf8b484c9..68f65ff2fd 100644 --- a/pc-bios/bios.bin +++ b/pc-bios/bios.bin diff --git a/pc-bios/vgabios-ati.bin b/pc-bios/vgabios-ati.bin Binary files differindex 7171a56f9d..4533d0d063 100644 --- a/pc-bios/vgabios-ati.bin +++ b/pc-bios/vgabios-ati.bin diff --git a/pc-bios/vgabios-bochs-display.bin b/pc-bios/vgabios-bochs-display.bin Binary files differindex afea4c930d..3ecf92de01 100644 --- a/pc-bios/vgabios-bochs-display.bin +++ b/pc-bios/vgabios-bochs-display.bin diff --git a/pc-bios/vgabios-cirrus.bin b/pc-bios/vgabios-cirrus.bin Binary files differindex 194c8139a7..9b4ffdf45f 100644 --- a/pc-bios/vgabios-cirrus.bin +++ b/pc-bios/vgabios-cirrus.bin diff --git a/pc-bios/vgabios-qxl.bin b/pc-bios/vgabios-qxl.bin Binary files differindex 056b6657b3..8a27dac557 100644 --- a/pc-bios/vgabios-qxl.bin +++ b/pc-bios/vgabios-qxl.bin diff --git a/pc-bios/vgabios-ramfb.bin b/pc-bios/vgabios-ramfb.bin Binary files differindex 02662006f2..ec9541cfb4 100644 --- a/pc-bios/vgabios-ramfb.bin +++ b/pc-bios/vgabios-ramfb.bin diff --git a/pc-bios/vgabios-stdvga.bin b/pc-bios/vgabios-stdvga.bin Binary files differindex cf81ce2876..55390c45c9 100644 --- a/pc-bios/vgabios-stdvga.bin +++ b/pc-bios/vgabios-stdvga.bin diff --git a/pc-bios/vgabios-virtio.bin b/pc-bios/vgabios-virtio.bin Binary files differindex f4178f70de..2334733a75 100644 --- a/pc-bios/vgabios-virtio.bin +++ b/pc-bios/vgabios-virtio.bin diff --git a/pc-bios/vgabios-vmware.bin b/pc-bios/vgabios-vmware.bin Binary files differindex 8fae88af28..b668ac04a6 100644 --- a/pc-bios/vgabios-vmware.bin +++ b/pc-bios/vgabios-vmware.bin diff --git a/pc-bios/vgabios.bin b/pc-bios/vgabios.bin Binary files differindex e5f45f0c9e..a924891ea5 100644 --- a/pc-bios/vgabios.bin +++ b/pc-bios/vgabios.bin diff --git a/plugins/loader.c b/plugins/loader.c index 4883b0a1cb..88c30bde2d 100644 --- a/plugins/loader.c +++ b/plugins/loader.c @@ -27,6 +27,7 @@ #include "qemu/cacheinfo.h" #include "qemu/xxhash.h" #include "qemu/plugin.h" +#include "qemu/memalign.h" #include "hw/core/cpu.h" #include "exec/exec-all.h" #ifndef CONFIG_USER_ONLY diff --git a/python/qemu/aqmp/legacy.py b/python/qemu/aqmp/legacy.py index 6baa5f3409..46026e9fdc 100644 --- a/python/qemu/aqmp/legacy.py +++ b/python/qemu/aqmp/legacy.py @@ -57,7 +57,7 @@ class QEMUMonitorProtocol(qemu.qmp.QEMUMonitorProtocol): self._timeout: Optional[float] = None if server: - self._aqmp._bind_hack(address) # pylint: disable=protected-access + self._sync(self._aqmp.start_server(self._address)) _T = TypeVar('_T') @@ -90,10 +90,7 @@ class QEMUMonitorProtocol(qemu.qmp.QEMUMonitorProtocol): self._aqmp.await_greeting = True self._aqmp.negotiate = True - self._sync( - self._aqmp.accept(self._address), - timeout - ) + self._sync(self._aqmp.accept(), timeout) ret = self._get_greeting() assert ret is not None diff --git a/python/qemu/aqmp/protocol.py b/python/qemu/aqmp/protocol.py index 33358f5cd7..36fae57f27 100644 --- a/python/qemu/aqmp/protocol.py +++ b/python/qemu/aqmp/protocol.py @@ -10,12 +10,14 @@ In this package, it is used as the implementation for the `QMPClient` class. """ +# It's all the docstrings ... ! It's long for a good reason ^_^; +# pylint: disable=too-many-lines + import asyncio from asyncio import StreamReader, StreamWriter from enum import Enum from functools import wraps import logging -import socket from ssl import SSLContext from typing import ( Any, @@ -239,8 +241,9 @@ class AsyncProtocol(Generic[T]): self._runstate = Runstate.IDLE self._runstate_changed: Optional[asyncio.Event] = None - # Workaround for bind() - self._sock: Optional[socket.socket] = None + # Server state for start_server() and _incoming() + self._server: Optional[asyncio.AbstractServer] = None + self._accepted: Optional[asyncio.Event] = None def __repr__(self) -> str: cls_name = type(self).__name__ @@ -265,21 +268,90 @@ class AsyncProtocol(Generic[T]): @upper_half @require(Runstate.IDLE) - async def accept(self, address: SocketAddrT, - ssl: Optional[SSLContext] = None) -> None: + async def start_server_and_accept( + self, address: SocketAddrT, + ssl: Optional[SSLContext] = None + ) -> None: """ Accept a connection and begin processing message queues. If this call fails, `runstate` is guaranteed to be set back to `IDLE`. + This method is precisely equivalent to calling `start_server()` + followed by `accept()`. + + :param address: + Address to listen on; UNIX socket path or TCP address/port. + :param ssl: SSL context to use, if any. + + :raise StateError: When the `Runstate` is not `IDLE`. + :raise ConnectError: + When a connection or session cannot be established. + + This exception will wrap a more concrete one. In most cases, + the wrapped exception will be `OSError` or `EOFError`. If a + protocol-level failure occurs while establishing a new + session, the wrapped error may also be an `QMPError`. + """ + await self.start_server(address, ssl) + await self.accept() + assert self.runstate == Runstate.RUNNING + + @upper_half + @require(Runstate.IDLE) + async def start_server(self, address: SocketAddrT, + ssl: Optional[SSLContext] = None) -> None: + """ + Start listening for an incoming connection, but do not wait for a peer. + + This method starts listening for an incoming connection, but + does not block waiting for a peer. This call will return + immediately after binding and listening on a socket. A later + call to `accept()` must be made in order to finalize the + incoming connection. :param address: - Address to listen to; UNIX socket path or TCP address/port. + Address to listen on; UNIX socket path or TCP address/port. :param ssl: SSL context to use, if any. :raise StateError: When the `Runstate` is not `IDLE`. - :raise ConnectError: If a connection could not be accepted. + :raise ConnectError: + When the server could not start listening on this address. + + This exception will wrap a more concrete one. In most cases, + the wrapped exception will be `OSError`. + """ + await self._session_guard( + self._do_start_server(address, ssl), + 'Failed to establish connection') + assert self.runstate == Runstate.CONNECTING + + @upper_half + @require(Runstate.CONNECTING) + async def accept(self) -> None: + """ + Accept an incoming connection and begin processing message queues. + + If this call fails, `runstate` is guaranteed to be set back to `IDLE`. + + :raise StateError: When the `Runstate` is not `CONNECTING`. + :raise QMPError: When `start_server()` was not called yet. + :raise ConnectError: + When a connection or session cannot be established. + + This exception will wrap a more concrete one. In most cases, + the wrapped exception will be `OSError` or `EOFError`. If a + protocol-level failure occurs while establishing a new + session, the wrapped error may also be an `QMPError`. """ - await self._new_session(address, ssl, accept=True) + if self._accepted is None: + raise QMPError("Cannot call accept() before start_server().") + await self._session_guard( + self._do_accept(), + 'Failed to establish connection') + await self._session_guard( + self._establish_session(), + 'Failed to establish session') + assert self.runstate == Runstate.RUNNING @upper_half @require(Runstate.IDLE) @@ -295,9 +367,21 @@ class AsyncProtocol(Generic[T]): :param ssl: SSL context to use, if any. :raise StateError: When the `Runstate` is not `IDLE`. - :raise ConnectError: If a connection cannot be made to the server. + :raise ConnectError: + When a connection or session cannot be established. + + This exception will wrap a more concrete one. In most cases, + the wrapped exception will be `OSError` or `EOFError`. If a + protocol-level failure occurs while establishing a new + session, the wrapped error may also be an `QMPError`. """ - await self._new_session(address, ssl) + await self._session_guard( + self._do_connect(address, ssl), + 'Failed to establish connection') + await self._session_guard( + self._establish_session(), + 'Failed to establish session') + assert self.runstate == Runstate.RUNNING @upper_half async def disconnect(self) -> None: @@ -317,153 +401,146 @@ class AsyncProtocol(Generic[T]): # Section: Session machinery # -------------------------- - @property - def _runstate_event(self) -> asyncio.Event: - # asyncio.Event() objects should not be created prior to entrance into - # an event loop, so we can ensure we create it in the correct context. - # Create it on-demand *only* at the behest of an 'async def' method. - if not self._runstate_changed: - self._runstate_changed = asyncio.Event() - return self._runstate_changed - - @upper_half - @bottom_half - def _set_state(self, state: Runstate) -> None: - """ - Change the `Runstate` of the protocol connection. - - Signals the `runstate_changed` event. - """ - if state == self._runstate: - return - - self.logger.debug("Transitioning from '%s' to '%s'.", - str(self._runstate), str(state)) - self._runstate = state - self._runstate_event.set() - self._runstate_event.clear() - - @upper_half - async def _new_session(self, - address: SocketAddrT, - ssl: Optional[SSLContext] = None, - accept: bool = False) -> None: + async def _session_guard(self, coro: Awaitable[None], emsg: str) -> None: """ - Establish a new connection and initialize the session. + Async guard function used to roll back to `IDLE` on any error. - Connect or accept a new connection, then begin the protocol - session machinery. If this call fails, `runstate` is guaranteed - to be set back to `IDLE`. + On any Exception, the state machine will be reset back to + `IDLE`. Most Exceptions will be wrapped with `ConnectError`, but + `BaseException` events will be left alone (This includes + asyncio.CancelledError, even prior to Python 3.8). - :param address: - Address to connect to/listen on; - UNIX socket path or TCP address/port. - :param ssl: SSL context to use, if any. - :param accept: Accept a connection instead of connecting when `True`. + :param error_message: + Human-readable string describing what connection phase failed. + :raise BaseException: + When `BaseException` occurs in the guarded block. :raise ConnectError: - When a connection or session cannot be established. - - This exception will wrap a more concrete one. In most cases, - the wrapped exception will be `OSError` or `EOFError`. If a - protocol-level failure occurs while establishing a new - session, the wrapped error may also be an `QMPError`. + When any other error is encountered in the guarded block. """ - assert self.runstate == Runstate.IDLE - + # Note: After Python 3.6 support is removed, this should be an + # @asynccontextmanager instead of accepting a callback. try: - phase = "connection" - await self._establish_connection(address, ssl, accept) - - phase = "session" - await self._establish_session() - + await coro except BaseException as err: - emsg = f"Failed to establish {phase}" self.logger.error("%s: %s", emsg, exception_summary(err)) self.logger.debug("%s:\n%s\n", emsg, pretty_traceback()) try: - # Reset from CONNECTING back to IDLE. + # Reset the runstate back to IDLE. await self.disconnect() except: - emsg = "Unexpected bottom half exception" + # We don't expect any Exceptions from the disconnect function + # here, because we failed to connect in the first place. + # The disconnect() function is intended to perform + # only cannot-fail cleanup here, but you never know. + emsg = ( + "Unexpected bottom half exception. " + "This is a bug in the QMP library. " + "Please report it to <qemu-devel@nongnu.org> and " + "CC: John Snow <jsnow@redhat.com>." + ) self.logger.critical("%s:\n%s\n", emsg, pretty_traceback()) raise + # CancelledError is an Exception with special semantic meaning; + # We do NOT want to wrap it up under ConnectError. # NB: CancelledError is not a BaseException before Python 3.8 if isinstance(err, asyncio.CancelledError): raise + # Any other kind of error can be treated as some kind of connection + # failure broadly. Inspect the 'exc' field to explore the root + # cause in greater detail. if isinstance(err, Exception): raise ConnectError(emsg, err) from err # Raise BaseExceptions un-wrapped, they're more important. raise - assert self.runstate == Runstate.RUNNING + @property + def _runstate_event(self) -> asyncio.Event: + # asyncio.Event() objects should not be created prior to entrance into + # an event loop, so we can ensure we create it in the correct context. + # Create it on-demand *only* at the behest of an 'async def' method. + if not self._runstate_changed: + self._runstate_changed = asyncio.Event() + return self._runstate_changed @upper_half - async def _establish_connection( - self, - address: SocketAddrT, - ssl: Optional[SSLContext] = None, - accept: bool = False - ) -> None: + @bottom_half + def _set_state(self, state: Runstate) -> None: """ - Establish a new connection. + Change the `Runstate` of the protocol connection. - :param address: - Address to connect to/listen on; - UNIX socket path or TCP address/port. - :param ssl: SSL context to use, if any. - :param accept: Accept a connection instead of connecting when `True`. + Signals the `runstate_changed` event. """ - assert self.runstate == Runstate.IDLE - self._set_state(Runstate.CONNECTING) - - # Allow runstate watchers to witness 'CONNECTING' state; some - # failures in the streaming layer are synchronous and will not - # otherwise yield. - await asyncio.sleep(0) + if state == self._runstate: + return - if accept: - await self._do_accept(address, ssl) - else: - await self._do_connect(address, ssl) + self.logger.debug("Transitioning from '%s' to '%s'.", + str(self._runstate), str(state)) + self._runstate = state + self._runstate_event.set() + self._runstate_event.clear() - def _bind_hack(self, address: Union[str, Tuple[str, int]]) -> None: + @bottom_half + async def _stop_server(self) -> None: + """ + Stop listening for / accepting new incoming connections. """ - Used to create a socket in advance of accept(). + if self._server is None: + return - This is a workaround to ensure that we can guarantee timing of - precisely when a socket exists to avoid a connection attempt - bouncing off of nothing. + try: + self.logger.debug("Stopping server.") + self._server.close() + await self._server.wait_closed() + self.logger.debug("Server stopped.") + finally: + self._server = None - Python 3.7+ adds a feature to separate the server creation and - listening phases instead, and should be used instead of this - hack. + @bottom_half # However, it does not run from the R/W tasks. + async def _incoming(self, + reader: asyncio.StreamReader, + writer: asyncio.StreamWriter) -> None: """ - if isinstance(address, tuple): - family = socket.AF_INET - else: - family = socket.AF_UNIX + Accept an incoming connection and signal the upper_half. - sock = socket.socket(family, socket.SOCK_STREAM) - sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + This method does the minimum necessary to accept a single + incoming connection. It signals back to the upper_half ASAP so + that any errors during session initialization can occur + naturally in the caller's stack. - try: - sock.bind(address) - except: - sock.close() - raise + :param reader: Incoming `asyncio.StreamReader` + :param writer: Incoming `asyncio.StreamWriter` + """ + peer = writer.get_extra_info('peername', 'Unknown peer') + self.logger.debug("Incoming connection from %s", peer) + + if self._reader or self._writer: + # Sadly, we can have more than one pending connection + # because of https://bugs.python.org/issue46715 + # Close any extra connections we don't actually want. + self.logger.warning("Extraneous connection inadvertently accepted") + writer.close() + return - self._sock = sock + # A connection has been accepted; stop listening for new ones. + assert self._accepted is not None + await self._stop_server() + self._reader, self._writer = (reader, writer) + self._accepted.set() @upper_half - async def _do_accept(self, address: SocketAddrT, - ssl: Optional[SSLContext] = None) -> None: + async def _do_start_server(self, address: SocketAddrT, + ssl: Optional[SSLContext] = None) -> None: """ - Acting as the transport server, accept a single connection. + Start listening for an incoming connection, but do not wait for a peer. + + This method starts listening for an incoming connection, but does not + block waiting for a peer. This call will return immediately after + binding and listening to a socket. A later call to accept() must be + made in order to finalize the incoming connection. :param address: Address to listen on; UNIX socket path or TCP address/port. @@ -471,52 +548,54 @@ class AsyncProtocol(Generic[T]): :raise OSError: For stream-related errors. """ - self.logger.debug("Awaiting connection on %s ...", address) - connected = asyncio.Event() - server: Optional[asyncio.AbstractServer] = None - - async def _client_connected_cb(reader: asyncio.StreamReader, - writer: asyncio.StreamWriter) -> None: - """Used to accept a single incoming connection, see below.""" - nonlocal server - nonlocal connected - - # A connection has been accepted; stop listening for new ones. - assert server is not None - server.close() - await server.wait_closed() - server = None - - # Register this client as being connected - self._reader, self._writer = (reader, writer) + assert self.runstate == Runstate.IDLE + self._set_state(Runstate.CONNECTING) - # Signal back: We've accepted a client! - connected.set() + self.logger.debug("Awaiting connection on %s ...", address) + self._accepted = asyncio.Event() if isinstance(address, tuple): coro = asyncio.start_server( - _client_connected_cb, - host=None if self._sock else address[0], - port=None if self._sock else address[1], + self._incoming, + host=address[0], + port=address[1], ssl=ssl, backlog=1, limit=self._limit, - sock=self._sock, ) else: coro = asyncio.start_unix_server( - _client_connected_cb, - path=None if self._sock else address, + self._incoming, + path=address, ssl=ssl, backlog=1, limit=self._limit, - sock=self._sock, ) - server = await coro # Starts listening - await connected.wait() # Waits for the callback to fire (and finish) - assert server is None - self._sock = None + # Allow runstate watchers to witness 'CONNECTING' state; some + # failures in the streaming layer are synchronous and will not + # otherwise yield. + await asyncio.sleep(0) + + # This will start the server (bind(2), listen(2)). It will also + # call accept(2) if we yield, but we don't block on that here. + self._server = await coro + self.logger.debug("Server listening on %s", address) + + @upper_half + async def _do_accept(self) -> None: + """ + Wait for and accept an incoming connection. + + Requires that we have not yet accepted an incoming connection + from the upper_half, but it's OK if the server is no longer + running because the bottom_half has already accepted the + connection. + """ + assert self._accepted is not None + await self._accepted.wait() + assert self._server is None + self._accepted = None self.logger.debug("Connection accepted.") @@ -532,6 +611,14 @@ class AsyncProtocol(Generic[T]): :raise OSError: For stream-related errors. """ + assert self.runstate == Runstate.IDLE + self._set_state(Runstate.CONNECTING) + + # Allow runstate watchers to witness 'CONNECTING' state; some + # failures in the streaming layer are synchronous and will not + # otherwise yield. + await asyncio.sleep(0) + self.logger.debug("Connecting to %s ...", address) if isinstance(address, tuple): @@ -644,6 +731,7 @@ class AsyncProtocol(Generic[T]): self._reader = None self._writer = None + self._accepted = None # NB: _runstate_changed cannot be cleared because we still need it to # send the final runstate changed event ...! @@ -667,6 +755,9 @@ class AsyncProtocol(Generic[T]): def _done(task: Optional['asyncio.Future[Any]']) -> bool: return task is not None and task.done() + # If the server is running, stop it. + await self._stop_server() + # Are we already in an error pathway? If either of the tasks are # already done, or if we have no tasks but a reader/writer; we # must be. diff --git a/python/tests/protocol.py b/python/tests/protocol.py index 5cd7938be3..d6849ad306 100644 --- a/python/tests/protocol.py +++ b/python/tests/protocol.py @@ -41,12 +41,25 @@ class NullProtocol(AsyncProtocol[None]): self.trigger_input = asyncio.Event() await super()._establish_session() - async def _do_accept(self, address, ssl=None): - if not self.fake_session: - await super()._do_accept(address, ssl) + async def _do_start_server(self, address, ssl=None): + if self.fake_session: + self._accepted = asyncio.Event() + self._set_state(Runstate.CONNECTING) + await asyncio.sleep(0) + else: + await super()._do_start_server(address, ssl) + + async def _do_accept(self): + if self.fake_session: + self._accepted = None + else: + await super()._do_accept() async def _do_connect(self, address, ssl=None): - if not self.fake_session: + if self.fake_session: + self._set_state(Runstate.CONNECTING) + await asyncio.sleep(0) + else: await super()._do_connect(address, ssl) async def _do_recv(self) -> None: @@ -413,14 +426,14 @@ class Accept(Connect): assert family in ('INET', 'UNIX') if family == 'INET': - await self.proto.accept(('example.com', 1)) + await self.proto.start_server_and_accept(('example.com', 1)) elif family == 'UNIX': - await self.proto.accept('/dev/null') + await self.proto.start_server_and_accept('/dev/null') async def _hanging_connection(self): with TemporaryDirectory(suffix='.aqmp') as tmpdir: sock = os.path.join(tmpdir, type(self.proto).__name__ + ".sock") - await self.proto.accept(sock) + await self.proto.start_server_and_accept(sock) class FakeSession(TestBase): @@ -449,13 +462,13 @@ class FakeSession(TestBase): @TestBase.async_test async def testFakeAccept(self): """Test the full state lifecycle (via accept) with a no-op session.""" - await self.proto.accept('/not/a/real/path') + await self.proto.start_server_and_accept('/not/a/real/path') self.assertEqual(self.proto.runstate, Runstate.RUNNING) @TestBase.async_test async def testFakeRecv(self): """Test receiving a fake/null message.""" - await self.proto.accept('/not/a/real/path') + await self.proto.start_server_and_accept('/not/a/real/path') logname = self.proto.logger.name with self.assertLogs(logname, level='DEBUG') as context: @@ -471,7 +484,7 @@ class FakeSession(TestBase): @TestBase.async_test async def testFakeSend(self): """Test sending a fake/null message.""" - await self.proto.accept('/not/a/real/path') + await self.proto.start_server_and_accept('/not/a/real/path') logname = self.proto.logger.name with self.assertLogs(logname, level='DEBUG') as context: @@ -493,7 +506,7 @@ class FakeSession(TestBase): ): with self.assertRaises(StateError) as context: if accept: - await self.proto.accept('/not/a/real/path') + await self.proto.start_server_and_accept('/not/a/real/path') else: await self.proto.connect('/not/a/real/path') @@ -504,7 +517,7 @@ class FakeSession(TestBase): @TestBase.async_test async def testAcceptRequireRunning(self): """Test that accept() cannot be called when Runstate=RUNNING""" - await self.proto.accept('/not/a/real/path') + await self.proto.start_server_and_accept('/not/a/real/path') await self._prod_session_api( Runstate.RUNNING, @@ -515,7 +528,7 @@ class FakeSession(TestBase): @TestBase.async_test async def testConnectRequireRunning(self): """Test that connect() cannot be called when Runstate=RUNNING""" - await self.proto.accept('/not/a/real/path') + await self.proto.start_server_and_accept('/not/a/real/path') await self._prod_session_api( Runstate.RUNNING, @@ -526,7 +539,7 @@ class FakeSession(TestBase): @TestBase.async_test async def testAcceptRequireDisconnecting(self): """Test that accept() cannot be called when Runstate=DISCONNECTING""" - await self.proto.accept('/not/a/real/path') + await self.proto.start_server_and_accept('/not/a/real/path') # Cheat: force a disconnect. await self.proto.simulate_disconnect() @@ -541,7 +554,7 @@ class FakeSession(TestBase): @TestBase.async_test async def testConnectRequireDisconnecting(self): """Test that connect() cannot be called when Runstate=DISCONNECTING""" - await self.proto.accept('/not/a/real/path') + await self.proto.start_server_and_accept('/not/a/real/path') # Cheat: force a disconnect. await self.proto.simulate_disconnect() @@ -576,7 +589,7 @@ class SimpleSession(TestBase): async def testSmoke(self): with TemporaryDirectory(suffix='.aqmp') as tmpdir: sock = os.path.join(tmpdir, type(self.proto).__name__ + ".sock") - server_task = create_task(self.server.accept(sock)) + server_task = create_task(self.server.start_server_and_accept(sock)) # give the server a chance to start listening [...] await asyncio.sleep(0) diff --git a/qapi/block-core.json b/qapi/block-core.json index 9a5a3641d0..f13b5ff942 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2914,13 +2914,14 @@ # @blkreplay: Since 4.2 # @compress: Since 5.0 # @copy-before-write: Since 6.2 +# @snapshot-access: Since 7.0 # # Since: 2.9 ## { 'enum': 'BlockdevDriver', 'data': [ 'blkdebug', 'blklogwrites', 'blkreplay', 'blkverify', 'bochs', 'cloop', 'compress', 'copy-before-write', 'copy-on-read', 'dmg', - 'file', 'ftp', 'ftps', 'gluster', + 'file', 'snapshot-access', 'ftp', 'ftps', 'gluster', {'name': 'host_cdrom', 'if': 'HAVE_HOST_BLOCK_DEVICE' }, {'name': 'host_device', 'if': 'HAVE_HOST_BLOCK_DEVICE' }, 'http', 'https', 'iscsi', @@ -4171,11 +4172,19 @@ # # @target: The target for copy-before-write operations. # +# @bitmap: If specified, copy-before-write filter will do +# copy-before-write operations only for dirty regions of the +# bitmap. Bitmap size must be equal to length of file and +# target child of the filter. Note also, that bitmap is used +# only to initialize internal bitmap of the process, so further +# modifications (or removing) of specified bitmap doesn't +# influence the filter. (Since 7.0) +# # Since: 6.2 ## { 'struct': 'BlockdevOptionsCbw', 'base': 'BlockdevOptionsGenericFormat', - 'data': { 'target': 'BlockdevRef' } } + 'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap' } } ## # @BlockdevOptions: @@ -4259,6 +4268,7 @@ 'rbd': 'BlockdevOptionsRbd', 'replication': { 'type': 'BlockdevOptionsReplication', 'if': 'CONFIG_REPLICATION' }, + 'snapshot-access': 'BlockdevOptionsGenericFormat', 'ssh': 'BlockdevOptionsSsh', 'throttle': 'BlockdevOptionsThrottle', 'vdi': 'BlockdevOptionsGenericFormat', diff --git a/qemu-img.c b/qemu-img.c index 6fe2466032..5dffb3e616 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -42,6 +42,7 @@ #include "qemu/module.h" #include "qemu/sockets.h" #include "qemu/units.h" +#include "qemu/memalign.h" #include "qom/object_interfaces.h" #include "sysemu/block-backend.h" #include "block/block_int.h" diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 46593d632d..633b46cdb2 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -21,6 +21,7 @@ #include "qemu/option.h" #include "qemu/timer.h" #include "qemu/cutils.h" +#include "qemu/memalign.h" #define CMD_NOFILE_OK 0x01 diff --git a/qom/object.c b/qom/object.c index a27532a6ba..d34608558e 100644 --- a/qom/object.c +++ b/qom/object.c @@ -16,6 +16,7 @@ #include "qom/object.h" #include "qom/object_interfaces.h" #include "qemu/cutils.h" +#include "qemu/memalign.h" #include "qapi/visitor.h" #include "qapi/string-input-visitor.h" #include "qapi/string-output-visitor.h" diff --git a/roms/seabios b/roms/seabios -Subproject 6a62e0cb0dfe9cd28b70547dbea5caf76847c3a +Subproject d239552ce7220e448ae81f41515138f7b9e3c4d diff --git a/scripts/qmp/qmp-shell-wrap b/scripts/qmp/qmp-shell-wrap index 9e94da114f..66846e36d1 100755 --- a/scripts/qmp/qmp-shell-wrap +++ b/scripts/qmp/qmp-shell-wrap @@ -4,7 +4,7 @@ import os import sys sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python')) -from qemu.qmp import qmp_shell +from qemu.aqmp import qmp_shell if __name__ == '__main__': diff --git a/softmmu/arch_init.c b/softmmu/arch_init.c index 8919405c7b..79716f959b 100644 --- a/softmmu/arch_init.c +++ b/softmmu/arch_init.c @@ -22,6 +22,7 @@ * THE SOFTWARE. */ #include "qemu/osdep.h" +#include "qemu/module.h" #include "sysemu/arch_init.h" #ifdef TARGET_SPARC @@ -39,3 +40,11 @@ int graphic_depth = 32; #endif const uint32_t arch_type = QEMU_ARCH; + +void qemu_init_arch_modules(void) +{ +#ifdef CONFIG_MODULES + module_init_info(qemu_modinfo); + module_allow_arch(TARGET_NAME); +#endif +} diff --git a/softmmu/cpu-timers.c b/softmmu/cpu-timers.c index 34ddfa02f1..204d946a17 100644 --- a/softmmu/cpu-timers.c +++ b/softmmu/cpu-timers.c @@ -28,7 +28,6 @@ #include "migration/vmstate.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "exec/exec-all.h" #include "sysemu/cpus.h" #include "qemu/main-loop.h" #include "qemu/option.h" diff --git a/softmmu/cpus.c b/softmmu/cpus.c index 035395ae13..e1d84c8ccb 100644 --- a/softmmu/cpus.c +++ b/softmmu/cpus.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "monitor/monitor.h" +#include "qemu/coroutine-tls.h" #include "qapi/error.h" #include "qapi/qapi-commands-machine.h" #include "qapi/qapi-commands-misc.h" @@ -32,7 +33,7 @@ #include "qapi/qmp/qerror.h" #include "exec/gdbstub.h" #include "sysemu/hw_accel.h" -#include "exec/exec-all.h" +#include "exec/cpu-common.h" #include "qemu/thread.h" #include "qemu/plugin.h" #include "sysemu/cpus.h" @@ -66,6 +67,11 @@ static QemuMutex qemu_global_mutex; +/* + * The chosen accelerator is supposed to register this. + */ +static const AccelOpsClass *cpus_accel; + bool cpu_is_stopped(CPUState *cpu) { return cpu->stopped || !runstate_is_running(); @@ -84,10 +90,12 @@ bool cpu_thread_is_idle(CPUState *cpu) if (cpu_is_stopped(cpu)) { return true; } - if (!cpu->halted || cpu_has_work(cpu) || - kvm_halt_in_kernel() || whpx_apic_in_platform()) { + if (!cpu->halted || cpu_has_work(cpu)) { return false; } + if (cpus_accel->cpu_thread_is_idle) { + return cpus_accel->cpu_thread_is_idle(cpu); + } return true; } @@ -121,11 +129,6 @@ void hw_error(const char *fmt, ...) abort(); } -/* - * The chosen accelerator is supposed to register this. - */ -static const AccelOpsClass *cpus_accel; - void cpu_synchronize_all_states(void) { CPUState *cpu; @@ -192,7 +195,10 @@ void cpu_synchronize_pre_loadvm(CPUState *cpu) bool cpus_are_resettable(void) { - return cpu_check_are_resettable(); + if (cpus_accel->cpus_are_resettable) { + return cpus_accel->cpus_are_resettable(); + } + return true; } int64_t cpus_get_virtual_clock(void) @@ -473,11 +479,16 @@ bool qemu_in_vcpu_thread(void) return current_cpu && qemu_cpu_is_self(current_cpu); } -static __thread bool iothread_locked = false; +QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked) bool qemu_mutex_iothread_locked(void) { - return iothread_locked; + return get_iothread_locked(); +} + +bool qemu_in_main_thread(void) +{ + return qemu_mutex_iothread_locked(); } /* @@ -490,13 +501,13 @@ void qemu_mutex_lock_iothread_impl(const char *file, int line) g_assert(!qemu_mutex_iothread_locked()); bql_lock(&qemu_global_mutex, file, line); - iothread_locked = true; + set_iothread_locked(true); } void qemu_mutex_unlock_iothread(void) { g_assert(qemu_mutex_iothread_locked()); - iothread_locked = false; + set_iothread_locked(false); qemu_mutex_unlock(&qemu_global_mutex); } diff --git a/softmmu/globals.c b/softmmu/globals.c index 7d0fc81183..3ebd718e35 100644 --- a/softmmu/globals.c +++ b/softmmu/globals.c @@ -25,8 +25,6 @@ #include "qemu/osdep.h" #include "exec/cpu-common.h" #include "hw/display/vga.h" -#include "hw/i386/pc.h" -#include "hw/i386/x86.h" #include "hw/loader.h" #include "hw/xen/xen.h" #include "net/net.h" diff --git a/softmmu/memory_mapping.c b/softmmu/memory_mapping.c index a62eaa49cc..8320165ea2 100644 --- a/softmmu/memory_mapping.c +++ b/softmmu/memory_mapping.c @@ -17,6 +17,7 @@ #include "sysemu/memory_mapping.h" #include "exec/memory.h" #include "exec/address-spaces.h" +#include "hw/core/cpu.h" //#define DEBUG_GUEST_PHYS_REGION_ADD diff --git a/softmmu/meson.build b/softmmu/meson.build index 39f766ce7c..8138248661 100644 --- a/softmmu/meson.build +++ b/softmmu/meson.build @@ -1,20 +1,9 @@ specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: [files( 'arch_init.c', - 'balloon.c', - 'cpus.c', - 'cpu-throttle.c', - 'datadir.c', - 'globals.c', - 'physmem.c', 'ioport.c', - 'rtc.c', - 'runstate.c', 'memory.c', - 'memory_mapping.c', + 'physmem.c', 'qtest.c', - 'vl.c', - 'cpu-timers.c', - 'runstate-action.c', )]) specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: [files( @@ -22,9 +11,20 @@ specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: [files( )]) softmmu_ss.add(files( + 'balloon.c', 'bootdevice.c', + 'cpus.c', + 'cpu-throttle.c', + 'cpu-timers.c', + 'datadir.c', 'dma-helpers.c', + 'globals.c', + 'memory_mapping.c', 'qdev-monitor.c', + 'rtc.c', + 'runstate-action.c', + 'runstate.c', + 'vl.c', ), sdl, libpmem, libdaxctl) if have_tpm diff --git a/softmmu/physmem.c b/softmmu/physmem.c index a13289a594..43ae70fbe2 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -42,6 +42,7 @@ #include "qemu/config-file.h" #include "qemu/error-report.h" #include "qemu/qemu-print.h" +#include "qemu/memalign.h" #include "exec/memory.h" #include "exec/ioport.h" #include "sysemu/dma.h" @@ -61,7 +62,6 @@ #include "exec/memory-internal.h" #include "exec/ram_addr.h" -#include "exec/log.h" #include "qemu/pmem.h" @@ -3436,11 +3436,11 @@ address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr, #include "memory_ldst.c.inc" /* virtual memory access for debug (includes writing to ROM) */ -int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, - void *ptr, target_ulong len, bool is_write) +int cpu_memory_rw_debug(CPUState *cpu, vaddr addr, + void *ptr, size_t len, bool is_write) { hwaddr phys_addr; - target_ulong l, page; + vaddr l, page; uint8_t *buf = ptr; cpu_synchronize_state(cpu); diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c index 01f3834db5..12fe60c467 100644 --- a/softmmu/qdev-monitor.c +++ b/softmmu/qdev-monitor.c @@ -83,6 +83,8 @@ static const QDevAlias qdev_alias_table[] = { { "virtio-gpu-device", "virtio-gpu", QEMU_ARCH_VIRTIO_MMIO }, { "virtio-gpu-ccw", "virtio-gpu", QEMU_ARCH_VIRTIO_CCW }, { "virtio-gpu-pci", "virtio-gpu", QEMU_ARCH_VIRTIO_PCI }, + { "virtio-gpu-gl-device", "virtio-gpu-gl", QEMU_ARCH_VIRTIO_MMIO }, + { "virtio-gpu-gl-pci", "virtio-gpu-gl", QEMU_ARCH_VIRTIO_PCI }, { "virtio-input-host-device", "virtio-input-host", QEMU_ARCH_VIRTIO_MMIO }, { "virtio-input-host-ccw", "virtio-input-host", QEMU_ARCH_VIRTIO_CCW }, { "virtio-input-host-pci", "virtio-input-host", QEMU_ARCH_VIRTIO_PCI }, @@ -971,6 +973,8 @@ BlockBackend *blk_by_qdev_id(const char *id, Error **errp) DeviceState *dev; BlockBackend *blk; + GLOBAL_STATE_CODE(); + dev = find_device_state(id, errp); if (dev == NULL) { return NULL; @@ -1034,6 +1038,13 @@ int qemu_global_option(const char *str) if (!opts) { return -1; } + if (!qemu_opt_get(opts, "driver") + || !qemu_opt_get(opts, "property") + || !qemu_opt_get(opts, "value")) { + error_report("options 'driver', 'property', and 'value'" + " are required"); + return -1; + } return 0; } diff --git a/softmmu/vl.c b/softmmu/vl.c index 1fe028800f..0b81f61535 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -2815,10 +2815,7 @@ void qemu_init(int argc, char **argv, char **envp) error_init(argv[0]); qemu_init_exec_dir(argv[0]); -#ifdef CONFIG_MODULES - module_init_info(qemu_modinfo); - module_allow_arch(TARGET_NAME); -#endif + qemu_init_arch_modules(); qemu_init_subsystems(); diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c index 504d33aa91..dd18b2cde8 100644 --- a/storage-daemon/qemu-storage-daemon.c +++ b/storage-daemon/qemu-storage-daemon.c @@ -93,6 +93,9 @@ static void help(void) " --chardev <options> configure a character device backend\n" " (see the qemu(1) man page for possible options)\n" "\n" +" --daemonize daemonize the process, and have the parent exit\n" +" once startup is complete\n" +"\n" " --export [type=]nbd,id=<id>,node-name=<node-name>[,name=<export-name>]\n" " [,writable=on|off][,bitmap=<name>]\n" " export the specified block node over NBD\n" @@ -144,6 +147,7 @@ QEMU_HELP_BOTTOM "\n", enum { OPTION_BLOCKDEV = 256, OPTION_CHARDEV, + OPTION_DAEMONIZE, OPTION_EXPORT, OPTION_MONITOR, OPTION_NBD_SERVER, @@ -177,13 +181,30 @@ static int getopt_set_loc(int argc, char **argv, const char *optstring, return c; } -static void process_options(int argc, char *argv[]) +/** + * Process QSD command-line arguments. + * + * This is done in two passes: + * + * First (@pre_init_pass is true), we do a pass where all global + * arguments pertaining to the QSD process (like --help or --daemonize) + * are processed. This pass is done before most of the QEMU-specific + * initialization steps (e.g. initializing the block layer or QMP), and + * so must only process arguments that are not really QEMU-specific. + * + * Second (@pre_init_pass is false), we (sequentially) process all + * QEMU/QSD-specific arguments. Many of these arguments are effectively + * translated to QMP commands (like --blockdev for blockdev-add, or + * --export for block-export-add). + */ +static void process_options(int argc, char *argv[], bool pre_init_pass) { int c; static const struct option long_options[] = { {"blockdev", required_argument, NULL, OPTION_BLOCKDEV}, {"chardev", required_argument, NULL, OPTION_CHARDEV}, + {"daemonize", no_argument, NULL, OPTION_DAEMONIZE}, {"export", required_argument, NULL, OPTION_EXPORT}, {"help", no_argument, NULL, 'h'}, {"monitor", required_argument, NULL, OPTION_MONITOR}, @@ -196,11 +217,27 @@ static void process_options(int argc, char *argv[]) }; /* - * In contrast to the system emulator, options are processed in the order - * they are given on the command lines. This means that things must be - * defined first before they can be referenced in another option. + * In contrast to the system emulator, QEMU-specific options are processed + * in the order they are given on the command lines. This means that things + * must be defined first before they can be referenced in another option. */ + optind = 1; while ((c = getopt_set_loc(argc, argv, "-hT:V", long_options)) != -1) { + bool handle_option_pre_init; + + /* Should this argument be processed in the pre-init pass? */ + handle_option_pre_init = + c == '?' || + c == 'h' || + c == 'V' || + c == OPTION_DAEMONIZE || + c == OPTION_PIDFILE; + + /* Process every option only in its respective pass */ + if (pre_init_pass != handle_option_pre_init) { + continue; + } + switch (c) { case '?': exit(EXIT_FAILURE); @@ -246,6 +283,12 @@ static void process_options(int argc, char *argv[]) qemu_opts_del(opts); break; } + case OPTION_DAEMONIZE: + if (os_set_daemonize(true) < 0) { + error_report("--daemonize not supported in this build"); + exit(EXIT_FAILURE); + } + break; case OPTION_EXPORT: { Visitor *v; @@ -334,6 +377,10 @@ int main(int argc, char *argv[]) qemu_init_exec_dir(argv[0]); os_setup_signal_handling(); + process_options(argc, argv, true); + + os_daemonize(); + module_call_init(MODULE_INIT_QOM); module_call_init(MODULE_INIT_TRACE); qemu_add_opts(&qemu_trace_opts); @@ -348,7 +395,7 @@ int main(int argc, char *argv[]) qemu_set_log(LOG_TRACE); qemu_init_main_loop(&error_fatal); - process_options(argc, argv); + process_options(argc, argv, false); /* * Write the pid file after creating chardevs, exports, and NBD servers but @@ -356,6 +403,7 @@ int main(int argc, char *argv[]) * it. */ pid_file_init(); + os_setup_post(); while (!exit_requested) { main_loop_wait(false); diff --git a/stubs/iothread-lock-block.c b/stubs/iothread-lock-block.c new file mode 100644 index 0000000000..c88ed70462 --- /dev/null +++ b/stubs/iothread-lock-block.c @@ -0,0 +1,8 @@ +#include "qemu/osdep.h" +#include "qemu/main-loop.h" + +bool qemu_in_main_thread(void) +{ + return qemu_get_current_aio_context() == qemu_get_aio_context(); +} + diff --git a/stubs/meson.build b/stubs/meson.build index d359cbe1ad..6f80fec761 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -17,6 +17,9 @@ if linux_io_uring.found() stub_ss.add(files('io_uring.c')) endif stub_ss.add(files('iothread-lock.c')) +if have_block + stub_ss.add(files('iothread-lock-block.c')) +endif stub_ss.add(files('isa-bus.c')) stub_ss.add(files('is-daemonized.c')) if libaio.found() diff --git a/target/alpha/cpu-qom.h b/target/alpha/cpu-qom.h index 7bb9173c57..1f200724b6 100644 --- a/target/alpha/cpu-qom.h +++ b/target/alpha/cpu-qom.h @@ -25,8 +25,7 @@ #define TYPE_ALPHA_CPU "alpha-cpu" -OBJECT_DECLARE_TYPE(AlphaCPU, AlphaCPUClass, - ALPHA_CPU) +OBJECT_DECLARE_CPU_TYPE(AlphaCPU, AlphaCPUClass, ALPHA_CPU) /** * AlphaCPUClass: diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h index e819211503..58f00b7814 100644 --- a/target/alpha/cpu.h +++ b/target/alpha/cpu.h @@ -197,9 +197,7 @@ enum { #define MMU_USER_IDX 1 #define MMU_PHYS_IDX 2 -typedef struct CPUAlphaState CPUAlphaState; - -struct CPUAlphaState { +typedef struct CPUArchState { uint64_t ir[31]; float64 fir[31]; uint64_t pc; @@ -251,7 +249,7 @@ struct CPUAlphaState { uint32_t features; uint32_t amask; int implver; -}; +} CPUAlphaState; /** * AlphaCPU: @@ -259,7 +257,7 @@ struct CPUAlphaState { * * An Alpha CPU. */ -struct AlphaCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -285,9 +283,6 @@ int alpha_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); #define cpu_list alpha_cpu_list -typedef CPUAlphaState CPUArchState; -typedef AlphaCPU ArchCPU; - #include "exec/cpu-all.h" enum { diff --git a/target/alpha/translate.c b/target/alpha/translate.c index ca78a0faed..66768ab47a 100644 --- a/target/alpha/translate.c +++ b/target/alpha/translate.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "cpu.h" #include "sysemu/cpus.h" -#include "sysemu/cpu-timers.h" #include "disas/disas.h" #include "qemu/host-utils.h" #include "exec/exec-all.h" diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h index a22bd506d0..64c44cef2d 100644 --- a/target/arm/cpu-qom.h +++ b/target/arm/cpu-qom.h @@ -27,8 +27,7 @@ struct arm_boot_info; #define TYPE_ARM_CPU "arm-cpu" -OBJECT_DECLARE_TYPE(ARMCPU, ARMCPUClass, - ARM_CPU) +OBJECT_DECLARE_CPU_TYPE(ARMCPU, ARMCPUClass, ARM_CPU) #define TYPE_ARM_MAX_CPU "max-" TYPE_ARM_CPU diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 7091684a16..185d4e774d 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1392,6 +1392,12 @@ void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp) error_propagate(errp, local_err); return; } + + arm_cpu_lpa2_finalize(cpu, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } } if (kvm_enabled()) { diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 24d9fff170..157f214cce 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -204,10 +204,12 @@ typedef struct { # define ARM_MAX_VQ 16 void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp); void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp); +void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp); #else # define ARM_MAX_VQ 1 static inline void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) { } static inline void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) { } +static inline void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp) { } #endif typedef struct ARMVectorReg { @@ -232,7 +234,7 @@ typedef struct CPUARMTBFlags { target_ulong flags2; } CPUARMTBFlags; -typedef struct CPUARMState { +typedef struct CPUArchState { /* Regs for current mode. */ uint32_t regs[16]; @@ -774,7 +776,7 @@ typedef struct ARMISARegisters ARMISARegisters; * * An ARM CPU core. */ -struct ARMCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -975,10 +977,11 @@ struct ARMCPU { /* * Intermediate values used during property parsing. - * Once finalized, the values should be read from ID_AA64ISAR1. + * Once finalized, the values should be read from ID_AA64*. */ bool prop_pauth; bool prop_pauth_impdef; + bool prop_lpa2; /* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */ uint32_t dcz_blocksize; @@ -3410,9 +3413,6 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env) } } -typedef CPUARMState CPUArchState; -typedef ARMCPU ArchCPU; - #include "exec/cpu-all.h" /* diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 2fdc16bf18..eb44c05822 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -688,6 +688,29 @@ void aarch64_add_pauth_properties(Object *obj) } } +static Property arm_cpu_lpa2_property = + DEFINE_PROP_BOOL("lpa2", ARMCPU, prop_lpa2, true); + +void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp) +{ + uint64_t t; + + /* + * We only install the property for tcg -cpu max; this is the + * only situation in which the cpu field can be true. + */ + if (!cpu->prop_lpa2) { + return; + } + + t = cpu->isar.id_aa64mmfr0; + t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 2); /* 16k pages w/ LPA2 */ + t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4, 1); /* 4k pages w/ LPA2 */ + t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16_2, 3); /* 16k stage2 w/ LPA2 */ + t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 3); /* 4k stage2 w/ LPA2 */ + cpu->isar.id_aa64mmfr0 = t; +} + static void aarch64_host_initfn(Object *obj) { #if defined(CONFIG_KVM) @@ -897,6 +920,7 @@ static void aarch64_max_initfn(Object *obj) aarch64_add_sve_properties(obj); object_property_add(obj, "sve-max-vq", "uint32", cpu_max_get_sve_max_vq, cpu_max_set_sve_max_vq, NULL, NULL); + qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); } static void aarch64_a64fx_initfn(Object *obj) diff --git a/target/arm/hvf_arm.h b/target/arm/hvf_arm.h index ea238cff83..9a9d1a0bf5 100644 --- a/target/arm/hvf_arm.h +++ b/target/arm/hvf_arm.h @@ -13,6 +13,6 @@ #include "cpu.h" -void hvf_arm_set_cpu_features_from_host(struct ARMCPU *cpu); +void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu); #endif diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c index 3854dd3516..384604c009 100644 --- a/target/arm/translate-neon.c +++ b/target/arm/translate-neon.c @@ -657,21 +657,24 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) /* Catch the UNDEF cases. This is unavoidably a bit messy. */ switch (nregs) { case 1: + if (a->stride != 1) { + return false; + } if (((a->align & (1 << a->size)) != 0) || (a->size == 2 && (a->align == 1 || a->align == 2))) { return false; } break; - case 3: - if ((a->align & 1) != 0) { - return false; - } - /* fall through */ case 2: if (a->size == 2 && (a->align & 2) != 0) { return false; } break; + case 3: + if (a->align != 0) { + return false; + } + break; case 4: if (a->size == 2 && a->align == 3) { return false; diff --git a/target/avr/cpu-qom.h b/target/avr/cpu-qom.h index 14e5b3ce72..32a1c762e6 100644 --- a/target/avr/cpu-qom.h +++ b/target/avr/cpu-qom.h @@ -26,8 +26,7 @@ #define TYPE_AVR_CPU "avr-cpu" -OBJECT_DECLARE_TYPE(AVRCPU, AVRCPUClass, - AVR_CPU) +OBJECT_DECLARE_CPU_TYPE(AVRCPU, AVRCPUClass, AVR_CPU) /** * AVRCPUClass: diff --git a/target/avr/cpu.h b/target/avr/cpu.h index dceacf3cd7..55497f851d 100644 --- a/target/avr/cpu.h +++ b/target/avr/cpu.h @@ -108,9 +108,7 @@ typedef enum AVRFeature { AVR_FEATURE_RAMPZ, } AVRFeature; -typedef struct CPUAVRState CPUAVRState; - -struct CPUAVRState { +typedef struct CPUArchState { uint32_t pc_w; /* 0x003fffff up to 22 bits */ uint32_t sregC; /* 0x00000001 1 bit */ @@ -137,7 +135,7 @@ struct CPUAVRState { bool fullacc; /* CPU/MEM if true MEM only otherwise */ uint64_t features; -}; +} CPUAVRState; /** * AVRCPU: @@ -145,14 +143,14 @@ struct CPUAVRState { * * A AVR CPU. */ -typedef struct AVRCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ CPUNegativeOffsetState neg; CPUAVRState env; -} AVRCPU; +}; extern const struct VMStateDescription vms_avr_cpu; @@ -247,9 +245,6 @@ bool avr_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr); -typedef CPUAVRState CPUArchState; -typedef AVRCPU ArchCPU; - #include "exec/cpu-all.h" #endif /* !defined (QEMU_AVR_CPU_H) */ diff --git a/target/cris/cpu-qom.h b/target/cris/cpu-qom.h index 2596edc7e3..71e8af0e70 100644 --- a/target/cris/cpu-qom.h +++ b/target/cris/cpu-qom.h @@ -25,8 +25,7 @@ #define TYPE_CRIS_CPU "cris-cpu" -OBJECT_DECLARE_TYPE(CRISCPU, CRISCPUClass, - CRIS_CPU) +OBJECT_DECLARE_CPU_TYPE(CRISCPU, CRISCPUClass, CRIS_CPU) /** * CRISCPUClass: diff --git a/target/cris/cpu.h b/target/cris/cpu.h index b445b194ea..e6776f25b1 100644 --- a/target/cris/cpu.h +++ b/target/cris/cpu.h @@ -105,7 +105,7 @@ typedef struct { uint32_t lo; } TLBSet; -typedef struct CPUCRISState { +typedef struct CPUArchState { uint32_t regs[16]; /* P0 - P15 are referred to as special registers in the docs. */ uint32_t pregs[16]; @@ -173,7 +173,7 @@ typedef struct CPUCRISState { * * A CRIS CPU. */ -struct CRISCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -265,9 +265,6 @@ static inline int cpu_mmu_index (CPUCRISState *env, bool ifetch) #define SFR_RW_MM_TLB_LO env->pregs[PR_SRS]][5 #define SFR_RW_MM_TLB_HI env->pregs[PR_SRS]][6 -typedef CPUCRISState CPUArchState; -typedef CRISCPU ArchCPU; - #include "exec/cpu-all.h" static inline void cpu_get_tb_cpu_state(CPUCRISState *env, target_ulong *pc, diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 58a0d3870b..2a65a57bab 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,14 +18,13 @@ #ifndef HEXAGON_CPU_H #define HEXAGON_CPU_H -/* Forward declaration needed by some of the header files */ -typedef struct CPUHexagonState CPUHexagonState; - #include "fpu/softfloat-types.h" #include "exec/cpu-defs.h" #include "hex_regs.h" #include "mmvec/mmvec.h" +#include "qom/object.h" +#include "hw/core/cpu.h" #define NUM_PREGS 4 #define TOTAL_PER_THREAD_REGS 64 @@ -75,7 +74,7 @@ typedef struct { /* Maximum number of vector temps in a packet */ #define VECTOR_TEMPS_MAX 4 -struct CPUHexagonState { +typedef struct CPUArchState { target_ulong gpr[TOTAL_PER_THREAD_REGS]; target_ulong pred[NUM_PREGS]; target_ulong branch_taken; @@ -129,14 +128,9 @@ struct CPUHexagonState { target_ulong vstore_pending[VSTORES_MAX]; bool vtcm_pending; VTCMStoreLog vtcm_log; -}; +} CPUHexagonState; -#define HEXAGON_CPU_CLASS(klass) \ - OBJECT_CLASS_CHECK(HexagonCPUClass, (klass), TYPE_HEXAGON_CPU) -#define HEXAGON_CPU(obj) \ - OBJECT_CHECK(HexagonCPU, (obj), TYPE_HEXAGON_CPU) -#define HEXAGON_CPU_GET_CLASS(obj) \ - OBJECT_GET_CLASS(HexagonCPUClass, (obj), TYPE_HEXAGON_CPU) +OBJECT_DECLARE_CPU_TYPE(HexagonCPU, HexagonCPUClass, HEXAGON_CPU) typedef struct HexagonCPUClass { /*< private >*/ @@ -146,7 +140,7 @@ typedef struct HexagonCPUClass { DeviceReset parent_reset; } HexagonCPUClass; -typedef struct HexagonCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -155,7 +149,7 @@ typedef struct HexagonCPU { bool lldb_compat; target_ulong lldb_stack_adjust; -} HexagonCPU; +}; #include "cpu_bits.h" @@ -180,7 +174,6 @@ static inline int cpu_mmu_index(CPUHexagonState *env, bool ifetch) #endif } -typedef struct CPUHexagonState CPUArchState; typedef HexagonCPU ArchCPU; void hexagon_translate_init(void); diff --git a/target/hppa/cpu-qom.h b/target/hppa/cpu-qom.h index d424f88370..b96e0318c7 100644 --- a/target/hppa/cpu-qom.h +++ b/target/hppa/cpu-qom.h @@ -25,8 +25,7 @@ #define TYPE_HPPA_CPU "hppa-cpu" -OBJECT_DECLARE_TYPE(HPPACPU, HPPACPUClass, - HPPA_CPU) +OBJECT_DECLARE_CPU_TYPE(HPPACPU, HPPACPUClass, HPPA_CPU) /** * HPPACPUClass: diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h index 93c119532a..4cc936b6bf 100644 --- a/target/hppa/cpu.h +++ b/target/hppa/cpu.h @@ -138,8 +138,6 @@ #define CR_IPSW 22 #define CR_EIRR 23 -typedef struct CPUHPPAState CPUHPPAState; - #if TARGET_REGISTER_BITS == 32 typedef uint32_t target_ureg; typedef int32_t target_sreg; @@ -168,7 +166,7 @@ typedef struct { unsigned access_id : 16; } hppa_tlb_entry; -struct CPUHPPAState { +typedef struct CPUArchState { target_ureg gr[32]; uint64_t fr[32]; uint64_t sr[8]; /* stored shifted into place for gva */ @@ -207,7 +205,7 @@ struct CPUHPPAState { /* ??? We should use a more intelligent data structure. */ hppa_tlb_entry tlb[HPPA_TLB_ENTRIES]; uint32_t tlb_last; -}; +} CPUHPPAState; /** * HPPACPU: @@ -215,7 +213,7 @@ struct CPUHPPAState { * * An HPPA CPU. */ -struct HPPACPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -225,10 +223,6 @@ struct HPPACPU { QEMUTimer *alarm_timer; }; - -typedef CPUHPPAState CPUArchState; -typedef HPPACPU ArchCPU; - #include "exec/cpu-all.h" static inline int cpu_mmu_index(CPUHPPAState *env, bool ifetch) diff --git a/target/i386/cpu-qom.h b/target/i386/cpu-qom.h index f9923cee04..c557a522e1 100644 --- a/target/i386/cpu-qom.h +++ b/target/i386/cpu-qom.h @@ -30,8 +30,7 @@ #define TYPE_X86_CPU "i386-cpu" #endif -OBJECT_DECLARE_TYPE(X86CPU, X86CPUClass, - X86_CPU) +OBJECT_DECLARE_CPU_TYPE(X86CPU, X86CPUClass, X86_CPU) typedef struct X86CPUModel X86CPUModel; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index e69ab5dd78..e11734ba86 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1431,7 +1431,7 @@ typedef struct HVFX86LazyFlags { target_ulong auxbits; } HVFX86LazyFlags; -typedef struct CPUX86State { +typedef struct CPUArchState { /* standard registers */ target_ulong regs[CPU_NB_REGS]; target_ulong eip; @@ -1707,7 +1707,7 @@ struct kvm_msrs; * * An x86 CPU. */ -struct X86CPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -2074,9 +2074,6 @@ static inline int cpu_mmu_index_kernel(CPUX86State *env) #define CC_SRC2 (env->cc_src2) #define CC_OP (env->cc_op) -typedef CPUX86State CPUArchState; -typedef X86CPU ArchCPU; - #include "exec/cpu-all.h" #include "svm.h" diff --git a/target/i386/hax/hax-all.c b/target/i386/hax/hax-all.c index bf65ed6fa9..81f665e212 100644 --- a/target/i386/hax/hax-all.c +++ b/target/i386/hax/hax-all.c @@ -49,18 +49,13 @@ const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */ /* Minimum HAX kernel version */ const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */ -static bool hax_allowed; +bool hax_allowed; struct hax_state hax_global; static void hax_vcpu_sync_state(CPUArchState *env, int modified); static int hax_arch_get_registers(CPUArchState *env); -int hax_enabled(void) -{ - return hax_allowed; -} - int valid_hax_tunnel_size(uint16_t size) { return size >= sizeof(struct hax_tunnel); @@ -227,7 +222,7 @@ int hax_init_vcpu(CPUState *cpu) cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index]; cpu->vcpu_dirty = true; - qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr)); + qemu_register_reset(hax_reset_vcpu_state, cpu->env_ptr); return ret; } @@ -674,7 +669,7 @@ void hax_cpu_synchronize_pre_loadvm(CPUState *cpu) int hax_smp_cpu_exec(CPUState *cpu) { - CPUArchState *env = (CPUArchState *) (cpu->env_ptr); + CPUArchState *env = cpu->env_ptr; int fatal; int ret; diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 4ba6e82fab..fc12c02fb2 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -49,6 +49,7 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/error-report.h" +#include "qemu/memalign.h" #include "sysemu/hvf.h" #include "sysemu/hvf_int.h" diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c index 7c8203b21f..050428795b 100644 --- a/target/i386/hvf/x86_emu.c +++ b/target/i386/hvf/x86_emu.c @@ -171,12 +171,12 @@ void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size) } } -static bool is_host_reg(struct CPUX86State *env, target_ulong ptr) +static bool is_host_reg(CPUX86State *env, target_ulong ptr) { return (ptr - (target_ulong)&env->regs[0]) < sizeof(env->regs); } -void write_val_ext(struct CPUX86State *env, target_ulong ptr, target_ulong val, int size) +void write_val_ext(CPUX86State *env, target_ulong ptr, target_ulong val, int size) { if (is_host_reg(env, ptr)) { write_val_to_reg(ptr, val, size); @@ -185,14 +185,14 @@ void write_val_ext(struct CPUX86State *env, target_ulong ptr, target_ulong val, vmx_write_mem(env_cpu(env), ptr, &val, size); } -uint8_t *read_mmio(struct CPUX86State *env, target_ulong ptr, int bytes) +uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes) { vmx_read_mem(env_cpu(env), env->hvf_mmio_buf, ptr, bytes); return env->hvf_mmio_buf; } -target_ulong read_val_ext(struct CPUX86State *env, target_ulong ptr, int size) +target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size) { target_ulong val; uint8_t *mmio_ptr; @@ -222,7 +222,7 @@ target_ulong read_val_ext(struct CPUX86State *env, target_ulong ptr, int size) return val; } -static void fetch_operands(struct CPUX86State *env, struct x86_decode *decode, +static void fetch_operands(CPUX86State *env, struct x86_decode *decode, int n, bool val_op0, bool val_op1, bool val_op2) { int i; @@ -261,7 +261,7 @@ static void fetch_operands(struct CPUX86State *env, struct x86_decode *decode, } } -static void exec_mov(struct CPUX86State *env, struct x86_decode *decode) +static void exec_mov(CPUX86State *env, struct x86_decode *decode) { fetch_operands(env, decode, 2, false, true, false); write_val_ext(env, decode->op[0].ptr, decode->op[1].val, @@ -270,49 +270,49 @@ static void exec_mov(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -static void exec_add(struct CPUX86State *env, struct x86_decode *decode) +static void exec_add(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, +, SET_FLAGS_OSZAPC_ADD, true); env->eip += decode->len; } -static void exec_or(struct CPUX86State *env, struct x86_decode *decode) +static void exec_or(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, |, SET_FLAGS_OSZAPC_LOGIC, true); env->eip += decode->len; } -static void exec_adc(struct CPUX86State *env, struct x86_decode *decode) +static void exec_adc(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, +get_CF(env)+, SET_FLAGS_OSZAPC_ADD, true); env->eip += decode->len; } -static void exec_sbb(struct CPUX86State *env, struct x86_decode *decode) +static void exec_sbb(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, -get_CF(env)-, SET_FLAGS_OSZAPC_SUB, true); env->eip += decode->len; } -static void exec_and(struct CPUX86State *env, struct x86_decode *decode) +static void exec_and(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, &, SET_FLAGS_OSZAPC_LOGIC, true); env->eip += decode->len; } -static void exec_sub(struct CPUX86State *env, struct x86_decode *decode) +static void exec_sub(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, true); env->eip += decode->len; } -static void exec_xor(struct CPUX86State *env, struct x86_decode *decode) +static void exec_xor(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, ^, SET_FLAGS_OSZAPC_LOGIC, true); env->eip += decode->len; } -static void exec_neg(struct CPUX86State *env, struct x86_decode *decode) +static void exec_neg(CPUX86State *env, struct x86_decode *decode) { /*EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, false);*/ int32_t val; @@ -335,13 +335,13 @@ static void exec_neg(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -static void exec_cmp(struct CPUX86State *env, struct x86_decode *decode) +static void exec_cmp(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, false); env->eip += decode->len; } -static void exec_inc(struct CPUX86State *env, struct x86_decode *decode) +static void exec_inc(CPUX86State *env, struct x86_decode *decode) { decode->op[1].type = X86_VAR_IMMEDIATE; decode->op[1].val = 0; @@ -351,7 +351,7 @@ static void exec_inc(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -static void exec_dec(struct CPUX86State *env, struct x86_decode *decode) +static void exec_dec(CPUX86State *env, struct x86_decode *decode) { decode->op[1].type = X86_VAR_IMMEDIATE; decode->op[1].val = 0; @@ -360,13 +360,13 @@ static void exec_dec(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -static void exec_tst(struct CPUX86State *env, struct x86_decode *decode) +static void exec_tst(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, &, SET_FLAGS_OSZAPC_LOGIC, false); env->eip += decode->len; } -static void exec_not(struct CPUX86State *env, struct x86_decode *decode) +static void exec_not(CPUX86State *env, struct x86_decode *decode) { fetch_operands(env, decode, 1, true, false, false); @@ -375,7 +375,7 @@ static void exec_not(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -void exec_movzx(struct CPUX86State *env, struct x86_decode *decode) +void exec_movzx(CPUX86State *env, struct x86_decode *decode) { int src_op_size; int op_size = decode->operand_size; @@ -395,7 +395,7 @@ void exec_movzx(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -static void exec_out(struct CPUX86State *env, struct x86_decode *decode) +static void exec_out(CPUX86State *env, struct x86_decode *decode) { switch (decode->opcode[0]) { case 0xe6: @@ -419,7 +419,7 @@ static void exec_out(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -static void exec_in(struct CPUX86State *env, struct x86_decode *decode) +static void exec_in(CPUX86State *env, struct x86_decode *decode) { target_ulong val = 0; switch (decode->opcode[0]) { @@ -455,7 +455,7 @@ static void exec_in(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -static inline void string_increment_reg(struct CPUX86State *env, int reg, +static inline void string_increment_reg(CPUX86State *env, int reg, struct x86_decode *decode) { target_ulong val = read_reg(env, reg, decode->addressing_size); @@ -467,8 +467,8 @@ static inline void string_increment_reg(struct CPUX86State *env, int reg, write_reg(env, reg, val, decode->addressing_size); } -static inline void string_rep(struct CPUX86State *env, struct x86_decode *decode, - void (*func)(struct CPUX86State *env, +static inline void string_rep(CPUX86State *env, struct x86_decode *decode, + void (*func)(CPUX86State *env, struct x86_decode *ins), int rep) { target_ulong rcx = read_reg(env, R_ECX, decode->addressing_size); @@ -484,7 +484,7 @@ static inline void string_rep(struct CPUX86State *env, struct x86_decode *decode } } -static void exec_ins_single(struct CPUX86State *env, struct x86_decode *decode) +static void exec_ins_single(CPUX86State *env, struct x86_decode *decode) { target_ulong addr = linear_addr_size(env_cpu(env), RDI(env), decode->addressing_size, R_ES); @@ -497,7 +497,7 @@ static void exec_ins_single(struct CPUX86State *env, struct x86_decode *decode) string_increment_reg(env, R_EDI, decode); } -static void exec_ins(struct CPUX86State *env, struct x86_decode *decode) +static void exec_ins(CPUX86State *env, struct x86_decode *decode) { if (decode->rep) { string_rep(env, decode, exec_ins_single, 0); @@ -508,7 +508,7 @@ static void exec_ins(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -static void exec_outs_single(struct CPUX86State *env, struct x86_decode *decode) +static void exec_outs_single(CPUX86State *env, struct x86_decode *decode) { target_ulong addr = decode_linear_addr(env, decode, RSI(env), R_DS); @@ -520,7 +520,7 @@ static void exec_outs_single(struct CPUX86State *env, struct x86_decode *decode) string_increment_reg(env, R_ESI, decode); } -static void exec_outs(struct CPUX86State *env, struct x86_decode *decode) +static void exec_outs(CPUX86State *env, struct x86_decode *decode) { if (decode->rep) { string_rep(env, decode, exec_outs_single, 0); @@ -531,7 +531,7 @@ static void exec_outs(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -static void exec_movs_single(struct CPUX86State *env, struct x86_decode *decode) +static void exec_movs_single(CPUX86State *env, struct x86_decode *decode) { target_ulong src_addr; target_ulong dst_addr; @@ -548,7 +548,7 @@ static void exec_movs_single(struct CPUX86State *env, struct x86_decode *decode) string_increment_reg(env, R_EDI, decode); } -static void exec_movs(struct CPUX86State *env, struct x86_decode *decode) +static void exec_movs(CPUX86State *env, struct x86_decode *decode) { if (decode->rep) { string_rep(env, decode, exec_movs_single, 0); @@ -559,7 +559,7 @@ static void exec_movs(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -static void exec_cmps_single(struct CPUX86State *env, struct x86_decode *decode) +static void exec_cmps_single(CPUX86State *env, struct x86_decode *decode) { target_ulong src_addr; target_ulong dst_addr; @@ -579,7 +579,7 @@ static void exec_cmps_single(struct CPUX86State *env, struct x86_decode *decode) string_increment_reg(env, R_EDI, decode); } -static void exec_cmps(struct CPUX86State *env, struct x86_decode *decode) +static void exec_cmps(CPUX86State *env, struct x86_decode *decode) { if (decode->rep) { string_rep(env, decode, exec_cmps_single, decode->rep); @@ -590,7 +590,7 @@ static void exec_cmps(struct CPUX86State *env, struct x86_decode *decode) } -static void exec_stos_single(struct CPUX86State *env, struct x86_decode *decode) +static void exec_stos_single(CPUX86State *env, struct x86_decode *decode) { target_ulong addr; target_ulong val; @@ -604,7 +604,7 @@ static void exec_stos_single(struct CPUX86State *env, struct x86_decode *decode) } -static void exec_stos(struct CPUX86State *env, struct x86_decode *decode) +static void exec_stos(CPUX86State *env, struct x86_decode *decode) { if (decode->rep) { string_rep(env, decode, exec_stos_single, 0); @@ -615,7 +615,7 @@ static void exec_stos(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -static void exec_scas_single(struct CPUX86State *env, struct x86_decode *decode) +static void exec_scas_single(CPUX86State *env, struct x86_decode *decode) { target_ulong addr; @@ -628,7 +628,7 @@ static void exec_scas_single(struct CPUX86State *env, struct x86_decode *decode) string_increment_reg(env, R_EDI, decode); } -static void exec_scas(struct CPUX86State *env, struct x86_decode *decode) +static void exec_scas(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = R_EAX; @@ -641,7 +641,7 @@ static void exec_scas(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -static void exec_lods_single(struct CPUX86State *env, struct x86_decode *decode) +static void exec_lods_single(CPUX86State *env, struct x86_decode *decode) { target_ulong addr; target_ulong val = 0; @@ -653,7 +653,7 @@ static void exec_lods_single(struct CPUX86State *env, struct x86_decode *decode) string_increment_reg(env, R_ESI, decode); } -static void exec_lods(struct CPUX86State *env, struct x86_decode *decode) +static void exec_lods(CPUX86State *env, struct x86_decode *decode) { if (decode->rep) { string_rep(env, decode, exec_lods_single, 0); @@ -760,7 +760,7 @@ void simulate_rdmsr(struct CPUState *cpu) RDX(env) = (uint32_t)(val >> 32); } -static void exec_rdmsr(struct CPUX86State *env, struct x86_decode *decode) +static void exec_rdmsr(CPUX86State *env, struct x86_decode *decode) { simulate_rdmsr(env_cpu(env)); env->eip += decode->len; @@ -855,7 +855,7 @@ void simulate_wrmsr(struct CPUState *cpu) printf("write msr %llx\n", RCX(cpu));*/ } -static void exec_wrmsr(struct CPUX86State *env, struct x86_decode *decode) +static void exec_wrmsr(CPUX86State *env, struct x86_decode *decode) { simulate_wrmsr(env_cpu(env)); env->eip += decode->len; @@ -865,7 +865,7 @@ static void exec_wrmsr(struct CPUX86State *env, struct x86_decode *decode) * flag: * 0 - bt, 1 - btc, 2 - bts, 3 - btr */ -static void do_bt(struct CPUX86State *env, struct x86_decode *decode, int flag) +static void do_bt(CPUX86State *env, struct x86_decode *decode, int flag) { int32_t displacement; uint8_t index; @@ -911,31 +911,31 @@ static void do_bt(struct CPUX86State *env, struct x86_decode *decode, int flag) set_CF(env, cf); } -static void exec_bt(struct CPUX86State *env, struct x86_decode *decode) +static void exec_bt(CPUX86State *env, struct x86_decode *decode) { do_bt(env, decode, 0); env->eip += decode->len; } -static void exec_btc(struct CPUX86State *env, struct x86_decode *decode) +static void exec_btc(CPUX86State *env, struct x86_decode *decode) { do_bt(env, decode, 1); env->eip += decode->len; } -static void exec_btr(struct CPUX86State *env, struct x86_decode *decode) +static void exec_btr(CPUX86State *env, struct x86_decode *decode) { do_bt(env, decode, 3); env->eip += decode->len; } -static void exec_bts(struct CPUX86State *env, struct x86_decode *decode) +static void exec_bts(CPUX86State *env, struct x86_decode *decode) { do_bt(env, decode, 2); env->eip += decode->len; } -void exec_shl(struct CPUX86State *env, struct x86_decode *decode) +void exec_shl(CPUX86State *env, struct x86_decode *decode) { uint8_t count; int of = 0, cf = 0; @@ -1022,7 +1022,7 @@ void exec_movsx(CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -void exec_ror(struct CPUX86State *env, struct x86_decode *decode) +void exec_ror(CPUX86State *env, struct x86_decode *decode) { uint8_t count; @@ -1100,7 +1100,7 @@ void exec_ror(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -void exec_rol(struct CPUX86State *env, struct x86_decode *decode) +void exec_rol(CPUX86State *env, struct x86_decode *decode) { uint8_t count; @@ -1182,7 +1182,7 @@ void exec_rol(struct CPUX86State *env, struct x86_decode *decode) } -void exec_rcl(struct CPUX86State *env, struct x86_decode *decode) +void exec_rcl(CPUX86State *env, struct x86_decode *decode) { uint8_t count; int of = 0, cf = 0; @@ -1267,7 +1267,7 @@ void exec_rcl(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -void exec_rcr(struct CPUX86State *env, struct x86_decode *decode) +void exec_rcr(CPUX86State *env, struct x86_decode *decode) { uint8_t count; int of = 0, cf = 0; @@ -1342,7 +1342,7 @@ void exec_rcr(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -static void exec_xchg(struct CPUX86State *env, struct x86_decode *decode) +static void exec_xchg(CPUX86State *env, struct x86_decode *decode) { fetch_operands(env, decode, 2, true, true, false); @@ -1354,7 +1354,7 @@ static void exec_xchg(struct CPUX86State *env, struct x86_decode *decode) env->eip += decode->len; } -static void exec_xadd(struct CPUX86State *env, struct x86_decode *decode) +static void exec_xadd(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, +, SET_FLAGS_OSZAPC_ADD, true); write_val_ext(env, decode->op[1].ptr, decode->op[0].val, @@ -1365,7 +1365,7 @@ static void exec_xadd(struct CPUX86State *env, struct x86_decode *decode) static struct cmd_handler { enum x86_decode_cmd cmd; - void (*handler)(struct CPUX86State *env, struct x86_decode *ins); + void (*handler)(CPUX86State *env, struct x86_decode *ins); } handlers[] = { {X86_DECODE_CMD_INVL, NULL,}, {X86_DECODE_CMD_MOV, exec_mov}, @@ -1465,7 +1465,7 @@ void store_regs(struct CPUState *cpu) macvm_set_rip(cpu, env->eip); } -bool exec_instruction(struct CPUX86State *env, struct x86_decode *ins) +bool exec_instruction(CPUX86State *env, struct x86_decode *ins) { /*if (hvf_vcpu_id(cpu)) printf("%d, %llx: exec_instruction %s\n", hvf_vcpu_id(cpu), env->eip, diff --git a/target/i386/hvf/x86_emu.h b/target/i386/hvf/x86_emu.h index 233f7b8daa..640da90b30 100644 --- a/target/i386/hvf/x86_emu.h +++ b/target/i386/hvf/x86_emu.h @@ -24,7 +24,7 @@ #include "cpu.h" void init_emu(void); -bool exec_instruction(struct CPUX86State *env, struct x86_decode *ins); +bool exec_instruction(CPUX86State *env, struct x86_decode *ins); void load_regs(struct CPUState *cpu); void store_regs(struct CPUState *cpu); @@ -36,15 +36,15 @@ target_ulong read_reg(CPUX86State *env, int reg, int size); void write_reg(CPUX86State *env, int reg, target_ulong val, int size); target_ulong read_val_from_reg(target_ulong reg_ptr, int size); void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size); -void write_val_ext(struct CPUX86State *env, target_ulong ptr, target_ulong val, int size); -uint8_t *read_mmio(struct CPUX86State *env, target_ulong ptr, int bytes); -target_ulong read_val_ext(struct CPUX86State *env, target_ulong ptr, int size); +void write_val_ext(CPUX86State *env, target_ulong ptr, target_ulong val, int size); +uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes); +target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size); -void exec_movzx(struct CPUX86State *env, struct x86_decode *decode); -void exec_shl(struct CPUX86State *env, struct x86_decode *decode); -void exec_movsx(struct CPUX86State *env, struct x86_decode *decode); -void exec_ror(struct CPUX86State *env, struct x86_decode *decode); -void exec_rol(struct CPUX86State *env, struct x86_decode *decode); -void exec_rcl(struct CPUX86State *env, struct x86_decode *decode); -void exec_rcr(struct CPUX86State *env, struct x86_decode *decode); +void exec_movzx(CPUX86State *env, struct x86_decode *decode); +void exec_shl(CPUX86State *env, struct x86_decode *decode); +void exec_movsx(CPUX86State *env, struct x86_decode *decode); +void exec_ror(CPUX86State *env, struct x86_decode *decode); +void exec_rol(CPUX86State *env, struct x86_decode *decode); +void exec_rcl(CPUX86State *env, struct x86_decode *decode); +void exec_rcr(CPUX86State *env, struct x86_decode *decode); #endif diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 2c8feb4a6f..83d0988302 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -37,6 +37,7 @@ #include "qemu/main-loop.h" #include "qemu/config-file.h" #include "qemu/error-report.h" +#include "qemu/memalign.h" #include "hw/i386/x86.h" #include "hw/i386/apic.h" #include "hw/i386/apic_internal.h" diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c index 9af261eea3..b97d091a50 100644 --- a/target/i386/nvmm/nvmm-all.c +++ b/target/i386/nvmm/nvmm-all.c @@ -85,7 +85,7 @@ nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg) static void nvmm_set_registers(CPUState *cpu) { - struct CPUX86State *env = (CPUArchState *)cpu->env_ptr; + CPUX86State *env = cpu->env_ptr; struct nvmm_machine *mach = get_nvmm_mach(); struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); struct nvmm_vcpu *vcpu = &qcpu->vcpu; @@ -222,7 +222,7 @@ nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg) static void nvmm_get_registers(CPUState *cpu) { - struct CPUX86State *env = (CPUArchState *)cpu->env_ptr; + CPUX86State *env = cpu->env_ptr; struct nvmm_machine *mach = get_nvmm_mach(); struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); struct nvmm_vcpu *vcpu = &qcpu->vcpu; @@ -347,7 +347,7 @@ nvmm_get_registers(CPUState *cpu) static bool nvmm_can_take_int(CPUState *cpu) { - struct CPUX86State *env = (CPUArchState *)cpu->env_ptr; + CPUX86State *env = cpu->env_ptr; struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); struct nvmm_vcpu *vcpu = &qcpu->vcpu; struct nvmm_machine *mach = get_nvmm_mach(); @@ -394,7 +394,7 @@ nvmm_can_take_nmi(CPUState *cpu) static void nvmm_vcpu_pre_run(CPUState *cpu) { - struct CPUX86State *env = (CPUArchState *)cpu->env_ptr; + CPUX86State *env = cpu->env_ptr; struct nvmm_machine *mach = get_nvmm_mach(); struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); struct nvmm_vcpu *vcpu = &qcpu->vcpu; @@ -480,7 +480,7 @@ static void nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit) { struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); - struct CPUX86State *env = (CPUArchState *)cpu->env_ptr; + CPUX86State *env = cpu->env_ptr; X86CPU *x86_cpu = X86_CPU(cpu); uint64_t tpr; @@ -652,7 +652,7 @@ static int nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu, struct nvmm_vcpu_exit *exit) { - struct CPUX86State *env = (CPUArchState *)cpu->env_ptr; + CPUX86State *env = cpu->env_ptr; int ret = 0; qemu_mutex_lock_iothread(); @@ -685,7 +685,7 @@ nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu) static int nvmm_vcpu_loop(CPUState *cpu) { - struct CPUX86State *env = (CPUArchState *)cpu->env_ptr; + CPUX86State *env = cpu->env_ptr; struct nvmm_machine *mach = get_nvmm_mach(); struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu); struct nvmm_vcpu *vcpu = &qcpu->vcpu; diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c index 5ba739fbed..5627772e7c 100644 --- a/target/i386/tcg/sysemu/excp_helper.c +++ b/target/i386/tcg/sysemu/excp_helper.c @@ -19,6 +19,7 @@ #include "qemu/osdep.h" #include "cpu.h" +#include "exec/exec-all.h" #include "tcg/helper-tcg.h" int get_pg_mode(CPUX86State *env) diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index 9ccaa054c4..3715c1e262 100644 --- a/target/i386/tcg/sysemu/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -23,6 +23,7 @@ #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #include "exec/address-spaces.h" +#include "exec/exec-all.h" #include "tcg/helper-tcg.h" void helper_outb(CPUX86State *env, uint32_t port, uint32_t data) diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c index 6bc47c5309..1d30e4e2ed 100644 --- a/target/i386/whpx/whpx-accel-ops.c +++ b/target/i386/whpx/whpx-accel-ops.c @@ -83,12 +83,18 @@ static void whpx_kick_vcpu_thread(CPUState *cpu) } } +static bool whpx_vcpu_thread_is_idle(CPUState *cpu) +{ + return !whpx_apic_in_platform(); +} + static void whpx_accel_ops_class_init(ObjectClass *oc, void *data) { AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); ops->create_vcpu_thread = whpx_start_vcpu_thread; ops->kick_vcpu_thread = whpx_kick_vcpu_thread; + ops->cpu_thread_is_idle = whpx_vcpu_thread_is_idle; ops->synchronize_post_reset = whpx_cpu_synchronize_post_reset; ops->synchronize_post_init = whpx_cpu_synchronize_post_init; diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index ef896da0a2..c7e25abf42 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -221,7 +221,7 @@ static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs) static int whpx_set_tsc(CPUState *cpu) { - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + CPUX86State *env = cpu->env_ptr; WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc; WHV_REGISTER_VALUE tsc_val; HRESULT hr; @@ -260,7 +260,7 @@ static void whpx_set_registers(CPUState *cpu, int level) { struct whpx_state *whpx = &whpx_global; struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + CPUX86State *env = cpu->env_ptr; X86CPU *x86_cpu = X86_CPU(cpu); struct whpx_register_set vcxt; HRESULT hr; @@ -428,7 +428,7 @@ static void whpx_set_registers(CPUState *cpu, int level) static int whpx_get_tsc(CPUState *cpu) { - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + CPUX86State *env = cpu->env_ptr; WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc; WHV_REGISTER_VALUE tsc_val; HRESULT hr; @@ -449,7 +449,7 @@ static void whpx_get_registers(CPUState *cpu) { struct whpx_state *whpx = &whpx_global; struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + CPUX86State *env = cpu->env_ptr; X86CPU *x86_cpu = X86_CPU(cpu); struct whpx_register_set vcxt; uint64_t tpr, apic_base; @@ -760,7 +760,7 @@ static int whpx_handle_portio(CPUState *cpu, static int whpx_handle_halt(CPUState *cpu) { - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + CPUX86State *env = cpu->env_ptr; int ret = 0; qemu_mutex_lock_iothread(); @@ -781,7 +781,7 @@ static void whpx_vcpu_pre_run(CPUState *cpu) HRESULT hr; struct whpx_state *whpx = &whpx_global; struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + CPUX86State *env = cpu->env_ptr; X86CPU *x86_cpu = X86_CPU(cpu); int irq; uint8_t tpr; @@ -903,7 +903,7 @@ static void whpx_vcpu_pre_run(CPUState *cpu) static void whpx_vcpu_post_run(CPUState *cpu) { struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + CPUX86State *env = cpu->env_ptr; X86CPU *x86_cpu = X86_CPU(cpu); env->eflags = vcpu->exit_ctx.VpContext.Rflags; @@ -927,7 +927,7 @@ static void whpx_vcpu_post_run(CPUState *cpu) static void whpx_vcpu_process_async_events(CPUState *cpu) { - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + CPUX86State *env = cpu->env_ptr; X86CPU *x86_cpu = X86_CPU(cpu); struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); @@ -1333,7 +1333,7 @@ int whpx_init_vcpu(CPUState *cpu) struct whpx_state *whpx = &whpx_global; struct whpx_vcpu *vcpu = NULL; Error *local_error = NULL; - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); + CPUX86State *env = cpu->env_ptr; X86CPU *x86_cpu = X86_CPU(cpu); UINT64 freq = 0; int ret; diff --git a/target/m68k/cpu-qom.h b/target/m68k/cpu-qom.h index 1ceb160ecb..cd9687192c 100644 --- a/target/m68k/cpu-qom.h +++ b/target/m68k/cpu-qom.h @@ -25,8 +25,7 @@ #define TYPE_M68K_CPU "m68k-cpu" -OBJECT_DECLARE_TYPE(M68kCPU, M68kCPUClass, - M68K_CPU) +OBJECT_DECLARE_CPU_TYPE(M68kCPU, M68kCPUClass, M68K_CPU) /* * M68kCPUClass: diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h index a3423729ef..872e8ce637 100644 --- a/target/m68k/cpu.h +++ b/target/m68k/cpu.h @@ -79,7 +79,7 @@ typedef CPU_LDoubleU FPReg; -typedef struct CPUM68KState { +typedef struct CPUArchState { uint32_t dregs[8]; uint32_t aregs[8]; uint32_t pc; @@ -156,7 +156,7 @@ typedef struct CPUM68KState { * * A Motorola 68k CPU. */ -struct M68kCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -574,9 +574,6 @@ void m68k_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, int mmu_idx, MemTxAttrs attrs, MemTxResult response, uintptr_t retaddr); -typedef CPUM68KState CPUArchState; -typedef M68kCPU ArchCPU; - #include "exec/cpu-all.h" /* TB flags */ diff --git a/target/microblaze/cpu-qom.h b/target/microblaze/cpu-qom.h index e520eefb12..255b39a45d 100644 --- a/target/microblaze/cpu-qom.h +++ b/target/microblaze/cpu-qom.h @@ -25,8 +25,7 @@ #define TYPE_MICROBLAZE_CPU "microblaze-cpu" -OBJECT_DECLARE_TYPE(MicroBlazeCPU, MicroBlazeCPUClass, - MICROBLAZE_CPU) +OBJECT_DECLARE_CPU_TYPE(MicroBlazeCPU, MicroBlazeCPUClass, MICROBLAZE_CPU) /** * MicroBlazeCPUClass: diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h index e9cd0b88de..0a0ce71b6a 100644 --- a/target/microblaze/cpu.h +++ b/target/microblaze/cpu.h @@ -24,7 +24,7 @@ #include "exec/cpu-defs.h" #include "fpu/softfloat-types.h" -typedef struct CPUMBState CPUMBState; +typedef struct CPUArchState CPUMBState; #if !defined(CONFIG_USER_ONLY) #include "mmu.h" #endif @@ -239,7 +239,7 @@ typedef struct CPUMBState CPUMBState; #define USE_NON_SECURE_M_AXI_DC_MASK 0x4 #define USE_NON_SECURE_M_AXI_IC_MASK 0x8 -struct CPUMBState { +struct CPUArchState { uint32_t bvalue; /* TCG temporary, only valid during a TB */ uint32_t btarget; /* Full resolved branch destination */ @@ -339,7 +339,7 @@ typedef struct { * * A MicroBlaze CPU. */ -struct MicroBlazeCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; @@ -394,9 +394,6 @@ void mb_tcg_init(void); #define MMU_USER_IDX 2 /* See NB_MMU_MODES further up the file. */ -typedef CPUMBState CPUArchState; -typedef MicroBlazeCPU ArchCPU; - #include "exec/cpu-all.h" /* Ensure there is no overlap between the two masks. */ diff --git a/target/microblaze/mmu.h b/target/microblaze/mmu.h index b6b4b9ad60..1068bd2d52 100644 --- a/target/microblaze/mmu.h +++ b/target/microblaze/mmu.h @@ -20,6 +20,8 @@ #ifndef TARGET_MICROBLAZE_MMU_H #define TARGET_MICROBLAZE_MMU_H +#include "cpu.h" + #define MMU_R_PID 0 #define MMU_R_ZPR 1 #define MMU_R_TLBX 2 diff --git a/target/mips/cpu-qom.h b/target/mips/cpu-qom.h index dda0c911fa..e28b529607 100644 --- a/target/mips/cpu-qom.h +++ b/target/mips/cpu-qom.h @@ -29,8 +29,7 @@ #define TYPE_MIPS_CPU "mips-cpu" #endif -OBJECT_DECLARE_TYPE(MIPSCPU, MIPSCPUClass, - MIPS_CPU) +OBJECT_DECLARE_CPU_TYPE(MIPSCPU, MIPSCPUClass, MIPS_CPU) /** * MIPSCPUClass: diff --git a/target/mips/cpu.h b/target/mips/cpu.h index 56b1cbd091..09e98f64de 100644 --- a/target/mips/cpu.h +++ b/target/mips/cpu.h @@ -524,8 +524,7 @@ struct TCState { }; struct MIPSITUState; -typedef struct CPUMIPSState CPUMIPSState; -struct CPUMIPSState { +typedef struct CPUArchState { TCState active_tc; CPUMIPSFPUContext active_fpu; @@ -1161,7 +1160,7 @@ struct CPUMIPSState { QEMUTimer *timer; /* Internal timer */ target_ulong exception_base; /* ExceptionBase input to the core */ uint64_t cp0_count_ns; /* CP0_Count clock period (in nanoseconds) */ -}; +} CPUMIPSState; /** * MIPSCPU: @@ -1172,7 +1171,7 @@ struct CPUMIPSState { * * A MIPS CPU. */ -struct MIPSCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -1218,9 +1217,6 @@ static inline int cpu_mmu_index(CPUMIPSState *env, bool ifetch) return hflags_mmu_index(env->hflags); } -typedef CPUMIPSState CPUArchState; -typedef MIPSCPU ArchCPU; - #include "exec/cpu-all.h" /* Exceptions */ diff --git a/target/mips/internal.h b/target/mips/internal.h index daddb05fd4..ac6e03e2f2 100644 --- a/target/mips/internal.h +++ b/target/mips/internal.h @@ -12,6 +12,7 @@ #ifdef CONFIG_TCG #include "tcg/tcg-internal.h" #endif +#include "cpu.h" /* * MMU types, the first four entries have the same layout as the @@ -133,14 +134,14 @@ struct r4k_tlb_t { struct CPUMIPSTLBContext { uint32_t nb_tlb; uint32_t tlb_in_use; - int (*map_address)(struct CPUMIPSState *env, hwaddr *physical, int *prot, + int (*map_address)(CPUMIPSState *env, hwaddr *physical, int *prot, target_ulong address, MMUAccessType access_type); - void (*helper_tlbwi)(struct CPUMIPSState *env); - void (*helper_tlbwr)(struct CPUMIPSState *env); - void (*helper_tlbp)(struct CPUMIPSState *env); - void (*helper_tlbr)(struct CPUMIPSState *env); - void (*helper_tlbinv)(struct CPUMIPSState *env); - void (*helper_tlbinvf)(struct CPUMIPSState *env); + void (*helper_tlbwi)(CPUMIPSState *env); + void (*helper_tlbwr)(CPUMIPSState *env); + void (*helper_tlbp)(CPUMIPSState *env); + void (*helper_tlbr)(CPUMIPSState *env); + void (*helper_tlbinv)(CPUMIPSState *env); + void (*helper_tlbinvf)(CPUMIPSState *env); union { struct { r4k_tlb_t tlb[MIPS_TLB_MAX]; diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c index 4cade61e93..6975ae4bdb 100644 --- a/target/nios2/cpu.c +++ b/target/nios2/cpu.c @@ -73,12 +73,9 @@ static void nios2_cpu_set_irq(void *opaque, int irq, int level) env->regs[CR_IPENDING] = deposit32(env->regs[CR_IPENDING], irq, 1, !!level); - env->irq_pending = env->regs[CR_IPENDING] & env->regs[CR_IENABLE]; - - if (env->irq_pending && (env->regs[CR_STATUS] & CR_STATUS_PIE)) { - env->irq_pending = 0; + if (env->regs[CR_IPENDING]) { cpu_interrupt(cs, CPU_INTERRUPT_HARD); - } else if (!env->irq_pending) { + } else { cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); } } @@ -134,7 +131,8 @@ static bool nios2_cpu_exec_interrupt(CPUState *cs, int interrupt_request) CPUNios2State *env = &cpu->env; if ((interrupt_request & CPU_INTERRUPT_HARD) && - (env->regs[CR_STATUS] & CR_STATUS_PIE)) { + (env->regs[CR_STATUS] & CR_STATUS_PIE) && + (env->regs[CR_IPENDING] & env->regs[CR_IENABLE])) { cs->exception_index = EXCP_IRQ; nios2_cpu_do_interrupt(cs); return true; diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h index d2ba0c5bbd..ca0f3420cd 100644 --- a/target/nios2/cpu.h +++ b/target/nios2/cpu.h @@ -25,15 +25,14 @@ #include "hw/core/cpu.h" #include "qom/object.h" -typedef struct CPUNios2State CPUNios2State; +typedef struct CPUArchState CPUNios2State; #if !defined(CONFIG_USER_ONLY) #include "mmu.h" #endif #define TYPE_NIOS2_CPU "nios2-cpu" -OBJECT_DECLARE_TYPE(Nios2CPU, Nios2CPUClass, - NIOS2_CPU) +OBJECT_DECLARE_CPU_TYPE(Nios2CPU, Nios2CPUClass, NIOS2_CPU) /** * Nios2CPUClass: @@ -155,12 +154,11 @@ struct Nios2CPUClass { #define CPU_INTERRUPT_NMI CPU_INTERRUPT_TGT_EXT_3 -struct CPUNios2State { +struct CPUArchState { uint32_t regs[NUM_CORE_REGS]; #if !defined(CONFIG_USER_ONLY) Nios2MMU mmu; - uint32_t irq_pending; #endif int error_code; }; @@ -171,7 +169,7 @@ struct CPUNios2State { * * A Nios2 CPU. */ -struct Nios2CPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ diff --git a/target/nios2/helper.h b/target/nios2/helper.h index 6c8f0b5b35..a44ecfdf7a 100644 --- a/target/nios2/helper.h +++ b/target/nios2/helper.h @@ -21,7 +21,7 @@ DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32) #if !defined(CONFIG_USER_ONLY) -DEF_HELPER_2(mmu_read_debug, void, env, i32) -DEF_HELPER_3(mmu_write, void, env, i32, i32) -DEF_HELPER_1(check_interrupts, void, env) +DEF_HELPER_2(mmu_write_tlbacc, void, env, i32) +DEF_HELPER_2(mmu_write_tlbmisc, void, env, i32) +DEF_HELPER_2(mmu_write_pteaddr, void, env, i32) #endif diff --git a/target/nios2/meson.build b/target/nios2/meson.build index e643917db1..62b384702d 100644 --- a/target/nios2/meson.build +++ b/target/nios2/meson.build @@ -2,14 +2,13 @@ nios2_ss = ss.source_set() nios2_ss.add(files( 'cpu.c', 'helper.c', - 'mmu.c', 'nios2-semi.c', 'op_helper.c', 'translate.c', )) nios2_softmmu_ss = ss.source_set() -nios2_softmmu_ss.add(files('monitor.c')) +nios2_softmmu_ss.add(files('monitor.c', 'mmu.c')) target_arch += {'nios2': nios2_ss} target_softmmu_arch += {'nios2': nios2_softmmu_ss} diff --git a/target/nios2/mmu.c b/target/nios2/mmu.c index 2545c06761..4daab2a7ab 100644 --- a/target/nios2/mmu.c +++ b/target/nios2/mmu.c @@ -23,37 +23,9 @@ #include "cpu.h" #include "exec/exec-all.h" #include "mmu.h" +#include "exec/helper-proto.h" +#include "trace/trace-target_nios2.h" -#if !defined(CONFIG_USER_ONLY) - -/* Define this to enable MMU debug messages */ -/* #define DEBUG_MMU */ - -#ifdef DEBUG_MMU -#define MMU_LOG(x) x -#else -#define MMU_LOG(x) -#endif - -void mmu_read_debug(CPUNios2State *env, uint32_t rn) -{ - switch (rn) { - case CR_TLBACC: - MMU_LOG(qemu_log("TLBACC READ %08X\n", env->regs[rn])); - break; - - case CR_TLBMISC: - MMU_LOG(qemu_log("TLBMISC READ %08X\n", env->regs[rn])); - break; - - case CR_PTEADDR: - MMU_LOG(qemu_log("PTEADDR READ %08X\n", env->regs[rn])); - break; - - default: - break; - } -} /* rw - 0 = read, 1 = write, 2 = fetch. */ unsigned int mmu_translate(CPUNios2State *env, @@ -63,37 +35,26 @@ unsigned int mmu_translate(CPUNios2State *env, Nios2CPU *cpu = env_archcpu(env); int pid = (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK) >> 4; int vpn = vaddr >> 12; + int way, n_ways = cpu->tlb_num_ways; - MMU_LOG(qemu_log("mmu_translate vaddr %08X, pid %08X, vpn %08X\n", - vaddr, pid, vpn)); - - int way; - for (way = 0; way < cpu->tlb_num_ways; way++) { - - Nios2TLBEntry *entry = - &env->mmu.tlb[(way * cpu->tlb_num_ways) + - (vpn & env->mmu.tlb_entry_mask)]; - - MMU_LOG(qemu_log("TLB[%d] TAG %08X, VPN %08X\n", - (way * cpu->tlb_num_ways) + - (vpn & env->mmu.tlb_entry_mask), - entry->tag, (entry->tag >> 12))); + for (way = 0; way < n_ways; way++) { + uint32_t index = (way * n_ways) + (vpn & env->mmu.tlb_entry_mask); + Nios2TLBEntry *entry = &env->mmu.tlb[index]; if (((entry->tag >> 12) != vpn) || (((entry->tag & (1 << 11)) == 0) && ((entry->tag & ((1 << cpu->pid_num_bits) - 1)) != pid))) { + trace_nios2_mmu_translate_miss(vaddr, pid, index, entry->tag); continue; } + lu->vaddr = vaddr & TARGET_PAGE_MASK; lu->paddr = (entry->data & CR_TLBACC_PFN_MASK) << TARGET_PAGE_BITS; lu->prot = ((entry->data & CR_TLBACC_R) ? PAGE_READ : 0) | ((entry->data & CR_TLBACC_W) ? PAGE_WRITE : 0) | ((entry->data & CR_TLBACC_X) ? PAGE_EXEC : 0); - MMU_LOG(qemu_log("HIT TLB[%d] %08X %08X %08X\n", - (way * cpu->tlb_num_ways) + - (vpn & env->mmu.tlb_entry_mask), - lu->vaddr, lu->paddr, lu->prot)); + trace_nios2_mmu_translate_hit(vaddr, pid, index, lu->paddr, lu->prot); return 1; } return 0; @@ -104,141 +65,119 @@ static void mmu_flush_pid(CPUNios2State *env, uint32_t pid) CPUState *cs = env_cpu(env); Nios2CPU *cpu = env_archcpu(env); int idx; - MMU_LOG(qemu_log("TLB Flush PID %d\n", pid)); for (idx = 0; idx < cpu->tlb_num_entries; idx++) { Nios2TLBEntry *entry = &env->mmu.tlb[idx]; - MMU_LOG(qemu_log("TLB[%d] => %08X %08X\n", - idx, entry->tag, entry->data)); - if ((entry->tag & (1 << 10)) && (!(entry->tag & (1 << 11))) && ((entry->tag & ((1 << cpu->pid_num_bits) - 1)) == pid)) { uint32_t vaddr = entry->tag & TARGET_PAGE_MASK; - MMU_LOG(qemu_log("TLB Flush Page %08X\n", vaddr)); - + trace_nios2_mmu_flush_pid_hit(pid, idx, vaddr); tlb_flush_page(cs, vaddr); + } else { + trace_nios2_mmu_flush_pid_miss(pid, idx, entry->tag); } } } -void mmu_write(CPUNios2State *env, uint32_t rn, uint32_t v) +void helper_mmu_write_tlbacc(CPUNios2State *env, uint32_t v) { CPUState *cs = env_cpu(env); Nios2CPU *cpu = env_archcpu(env); - MMU_LOG(qemu_log("mmu_write %08X = %08X\n", rn, v)); - - switch (rn) { - case CR_TLBACC: - MMU_LOG(qemu_log("TLBACC: IG %02X, FLAGS %c%c%c%c%c, PFN %05X\n", - v >> CR_TLBACC_IGN_SHIFT, - (v & CR_TLBACC_C) ? 'C' : '.', - (v & CR_TLBACC_R) ? 'R' : '.', - (v & CR_TLBACC_W) ? 'W' : '.', - (v & CR_TLBACC_X) ? 'X' : '.', - (v & CR_TLBACC_G) ? 'G' : '.', - v & CR_TLBACC_PFN_MASK)); - - /* if tlbmisc.WE == 1 then trigger a TLB write on writes to TLBACC */ - if (env->regs[CR_TLBMISC] & CR_TLBMISC_WR) { - int way = (env->regs[CR_TLBMISC] >> CR_TLBMISC_WAY_SHIFT); - int vpn = (env->mmu.pteaddr_wr & CR_PTEADDR_VPN_MASK) >> 2; - int pid = (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK) >> 4; - int g = (v & CR_TLBACC_G) ? 1 : 0; - int valid = ((vpn & CR_TLBACC_PFN_MASK) < 0xC0000) ? 1 : 0; - Nios2TLBEntry *entry = - &env->mmu.tlb[(way * cpu->tlb_num_ways) + - (vpn & env->mmu.tlb_entry_mask)]; - uint32_t newTag = (vpn << 12) | (g << 11) | (valid << 10) | pid; - uint32_t newData = v & (CR_TLBACC_C | CR_TLBACC_R | CR_TLBACC_W | - CR_TLBACC_X | CR_TLBACC_PFN_MASK); - - if ((entry->tag != newTag) || (entry->data != newData)) { - if (entry->tag & (1 << 10)) { - /* Flush existing entry */ - MMU_LOG(qemu_log("TLB Flush Page (OLD) %08X\n", - entry->tag & TARGET_PAGE_MASK)); - tlb_flush_page(cs, entry->tag & TARGET_PAGE_MASK); - } - entry->tag = newTag; - entry->data = newData; - MMU_LOG(qemu_log("TLB[%d] = %08X %08X\n", - (way * cpu->tlb_num_ways) + - (vpn & env->mmu.tlb_entry_mask), - entry->tag, entry->data)); + trace_nios2_mmu_write_tlbacc(v >> CR_TLBACC_IGN_SHIFT, + (v & CR_TLBACC_C) ? 'C' : '.', + (v & CR_TLBACC_R) ? 'R' : '.', + (v & CR_TLBACC_W) ? 'W' : '.', + (v & CR_TLBACC_X) ? 'X' : '.', + (v & CR_TLBACC_G) ? 'G' : '.', + v & CR_TLBACC_PFN_MASK); + + /* if tlbmisc.WE == 1 then trigger a TLB write on writes to TLBACC */ + if (env->regs[CR_TLBMISC] & CR_TLBMISC_WR) { + int way = (env->regs[CR_TLBMISC] >> CR_TLBMISC_WAY_SHIFT); + int vpn = (env->mmu.pteaddr_wr & CR_PTEADDR_VPN_MASK) >> 2; + int pid = (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK) >> 4; + int g = (v & CR_TLBACC_G) ? 1 : 0; + int valid = ((vpn & CR_TLBACC_PFN_MASK) < 0xC0000) ? 1 : 0; + Nios2TLBEntry *entry = + &env->mmu.tlb[(way * cpu->tlb_num_ways) + + (vpn & env->mmu.tlb_entry_mask)]; + uint32_t newTag = (vpn << 12) | (g << 11) | (valid << 10) | pid; + uint32_t newData = v & (CR_TLBACC_C | CR_TLBACC_R | CR_TLBACC_W | + CR_TLBACC_X | CR_TLBACC_PFN_MASK); + + if ((entry->tag != newTag) || (entry->data != newData)) { + if (entry->tag & (1 << 10)) { + /* Flush existing entry */ + tlb_flush_page(cs, entry->tag & TARGET_PAGE_MASK); } - /* Auto-increment tlbmisc.WAY */ - env->regs[CR_TLBMISC] = - (env->regs[CR_TLBMISC] & ~CR_TLBMISC_WAY_MASK) | - (((way + 1) & (cpu->tlb_num_ways - 1)) << - CR_TLBMISC_WAY_SHIFT); + entry->tag = newTag; + entry->data = newData; } + /* Auto-increment tlbmisc.WAY */ + env->regs[CR_TLBMISC] = + (env->regs[CR_TLBMISC] & ~CR_TLBMISC_WAY_MASK) | + (((way + 1) & (cpu->tlb_num_ways - 1)) << + CR_TLBMISC_WAY_SHIFT); + } - /* Writes to TLBACC don't change the read-back value */ - env->mmu.tlbacc_wr = v; - break; - - case CR_TLBMISC: - MMU_LOG(qemu_log("TLBMISC: WAY %X, FLAGS %c%c%c%c%c%c, PID %04X\n", - v >> CR_TLBMISC_WAY_SHIFT, - (v & CR_TLBMISC_RD) ? 'R' : '.', - (v & CR_TLBMISC_WR) ? 'W' : '.', - (v & CR_TLBMISC_DBL) ? '2' : '.', - (v & CR_TLBMISC_BAD) ? 'B' : '.', - (v & CR_TLBMISC_PERM) ? 'P' : '.', - (v & CR_TLBMISC_D) ? 'D' : '.', - (v & CR_TLBMISC_PID_MASK) >> 4)); + /* Writes to TLBACC don't change the read-back value */ + env->mmu.tlbacc_wr = v; +} - if ((v & CR_TLBMISC_PID_MASK) != - (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK)) { - mmu_flush_pid(env, (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK) >> - CR_TLBMISC_PID_SHIFT); - } - /* if tlbmisc.RD == 1 then trigger a TLB read on writes to TLBMISC */ - if (v & CR_TLBMISC_RD) { - int way = (v >> CR_TLBMISC_WAY_SHIFT); - int vpn = (env->mmu.pteaddr_wr & CR_PTEADDR_VPN_MASK) >> 2; - Nios2TLBEntry *entry = - &env->mmu.tlb[(way * cpu->tlb_num_ways) + - (vpn & env->mmu.tlb_entry_mask)]; +void helper_mmu_write_tlbmisc(CPUNios2State *env, uint32_t v) +{ + Nios2CPU *cpu = env_archcpu(env); - env->regs[CR_TLBACC] &= CR_TLBACC_IGN_MASK; - env->regs[CR_TLBACC] |= entry->data; - env->regs[CR_TLBACC] |= (entry->tag & (1 << 11)) ? CR_TLBACC_G : 0; - env->regs[CR_TLBMISC] = - (v & ~CR_TLBMISC_PID_MASK) | - ((entry->tag & ((1 << cpu->pid_num_bits) - 1)) << - CR_TLBMISC_PID_SHIFT); - env->regs[CR_PTEADDR] &= ~CR_PTEADDR_VPN_MASK; - env->regs[CR_PTEADDR] |= (entry->tag >> 12) << CR_PTEADDR_VPN_SHIFT; - MMU_LOG(qemu_log("TLB READ way %d, vpn %05X, tag %08X, data %08X, " - "tlbacc %08X, tlbmisc %08X, pteaddr %08X\n", - way, vpn, entry->tag, entry->data, - env->regs[CR_TLBACC], env->regs[CR_TLBMISC], - env->regs[CR_PTEADDR])); - } else { - env->regs[CR_TLBMISC] = v; - } + trace_nios2_mmu_write_tlbmisc(v >> CR_TLBMISC_WAY_SHIFT, + (v & CR_TLBMISC_RD) ? 'R' : '.', + (v & CR_TLBMISC_WR) ? 'W' : '.', + (v & CR_TLBMISC_DBL) ? '2' : '.', + (v & CR_TLBMISC_BAD) ? 'B' : '.', + (v & CR_TLBMISC_PERM) ? 'P' : '.', + (v & CR_TLBMISC_D) ? 'D' : '.', + (v & CR_TLBMISC_PID_MASK) >> 4); + + if ((v & CR_TLBMISC_PID_MASK) != + (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK)) { + mmu_flush_pid(env, (env->mmu.tlbmisc_wr & CR_TLBMISC_PID_MASK) >> + CR_TLBMISC_PID_SHIFT); + } + /* if tlbmisc.RD == 1 then trigger a TLB read on writes to TLBMISC */ + if (v & CR_TLBMISC_RD) { + int way = (v >> CR_TLBMISC_WAY_SHIFT); + int vpn = (env->mmu.pteaddr_wr & CR_PTEADDR_VPN_MASK) >> 2; + Nios2TLBEntry *entry = + &env->mmu.tlb[(way * cpu->tlb_num_ways) + + (vpn & env->mmu.tlb_entry_mask)]; - env->mmu.tlbmisc_wr = v; - break; + env->regs[CR_TLBACC] &= CR_TLBACC_IGN_MASK; + env->regs[CR_TLBACC] |= entry->data; + env->regs[CR_TLBACC] |= (entry->tag & (1 << 11)) ? CR_TLBACC_G : 0; + env->regs[CR_TLBMISC] = + (v & ~CR_TLBMISC_PID_MASK) | + ((entry->tag & ((1 << cpu->pid_num_bits) - 1)) << + CR_TLBMISC_PID_SHIFT); + env->regs[CR_PTEADDR] &= ~CR_PTEADDR_VPN_MASK; + env->regs[CR_PTEADDR] |= (entry->tag >> 12) << CR_PTEADDR_VPN_SHIFT; + } else { + env->regs[CR_TLBMISC] = v; + } - case CR_PTEADDR: - MMU_LOG(qemu_log("PTEADDR: PTBASE %03X, VPN %05X\n", - v >> CR_PTEADDR_PTBASE_SHIFT, - (v & CR_PTEADDR_VPN_MASK) >> CR_PTEADDR_VPN_SHIFT)); + env->mmu.tlbmisc_wr = v; +} - /* Writes to PTEADDR don't change the read-back VPN value */ - env->regs[CR_PTEADDR] = (v & ~CR_PTEADDR_VPN_MASK) | - (env->regs[CR_PTEADDR] & CR_PTEADDR_VPN_MASK); - env->mmu.pteaddr_wr = v; - break; +void helper_mmu_write_pteaddr(CPUNios2State *env, uint32_t v) +{ + trace_nios2_mmu_write_pteaddr(v >> CR_PTEADDR_PTBASE_SHIFT, + (v & CR_PTEADDR_VPN_MASK) >> CR_PTEADDR_VPN_SHIFT); - default: - break; - } + /* Writes to PTEADDR don't change the read-back VPN value */ + env->regs[CR_PTEADDR] = (v & ~CR_PTEADDR_VPN_MASK) | + (env->regs[CR_PTEADDR] & CR_PTEADDR_VPN_MASK); + env->mmu.pteaddr_wr = v; } void mmu_init(CPUNios2State *env) @@ -246,8 +185,6 @@ void mmu_init(CPUNios2State *env) Nios2CPU *cpu = env_archcpu(env); Nios2MMU *mmu = &env->mmu; - MMU_LOG(qemu_log("mmu_init\n")); - mmu->tlb_entry_mask = (cpu->tlb_num_entries / cpu->tlb_num_ways) - 1; mmu->tlb = g_new0(Nios2TLBEntry, cpu->tlb_num_entries); } @@ -277,5 +214,3 @@ void dump_mmu(CPUNios2State *env) (entry->data & CR_TLBACC_X) ? 'X' : '-'); } } - -#endif /* !CONFIG_USER_ONLY */ diff --git a/target/nios2/mmu.h b/target/nios2/mmu.h index 4f46fbb82e..5b085900fb 100644 --- a/target/nios2/mmu.h +++ b/target/nios2/mmu.h @@ -21,6 +21,8 @@ #ifndef NIOS2_MMU_H #define NIOS2_MMU_H +#include "cpu.h" + typedef struct Nios2TLBEntry { target_ulong tag; target_ulong data; @@ -44,7 +46,6 @@ void mmu_flip_um(CPUNios2State *env, unsigned int um); unsigned int mmu_translate(CPUNios2State *env, Nios2MMULookup *lu, target_ulong vaddr, int rw, int mmu_idx); -void mmu_read_debug(CPUNios2State *env, uint32_t rn); void mmu_write(CPUNios2State *env, uint32_t rn, uint32_t v); void mmu_init(CPUNios2State *env); diff --git a/target/nios2/op_helper.c b/target/nios2/op_helper.c index a59003855a..caa885f7b4 100644 --- a/target/nios2/op_helper.c +++ b/target/nios2/op_helper.c @@ -21,38 +21,9 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/cpu_ldst.h" #include "exec/exec-all.h" #include "qemu/main-loop.h" -#if !defined(CONFIG_USER_ONLY) -void helper_mmu_read_debug(CPUNios2State *env, uint32_t rn) -{ - mmu_read_debug(env, rn); -} - -void helper_mmu_write(CPUNios2State *env, uint32_t rn, uint32_t v) -{ - mmu_write(env, rn, v); -} - -static void nios2_check_interrupts(CPUNios2State *env) -{ - if (env->irq_pending && - (env->regs[CR_STATUS] & CR_STATUS_PIE)) { - env->irq_pending = 0; - cpu_interrupt(env_cpu(env), CPU_INTERRUPT_HARD); - } -} - -void helper_check_interrupts(CPUNios2State *env) -{ - qemu_mutex_lock_iothread(); - nios2_check_interrupts(env); - qemu_mutex_unlock_iothread(); -} -#endif /* !CONFIG_USER_ONLY */ - void helper_raise_exception(CPUNios2State *env, uint32_t index) { CPUState *cs = env_cpu(env); diff --git a/target/nios2/trace-events b/target/nios2/trace-events new file mode 100644 index 0000000000..07f1f0a5e7 --- /dev/null +++ b/target/nios2/trace-events @@ -0,0 +1,10 @@ +# mmu.c +nios2_mmu_translate_miss(uint32_t vaddr, uint32_t pid, uint32_t index, uint32_t tag) "mmu_translate: MISS vaddr=0x%08x pid=%u TLB[%u] tag=0x%08x" +nios2_mmu_translate_hit(uint32_t vaddr, uint32_t pid, uint32_t index, uint32_t paddr, uint32_t prot) "mmu_translate: HIT vaddr=0x%08x pid=%u TLB[%u] paddr=0x%08x prot=0x%x" + +nios2_mmu_flush_pid_miss(uint32_t pid, uint32_t index, uint32_t vaddr) "mmu_flush: MISS pid=%u TLB[%u] tag=0x%08x" +nios2_mmu_flush_pid_hit(uint32_t pid, uint32_t index, uint32_t vaddr) "mmu_flush: HIT pid=%u TLB[%u] vaddr=0x%08x" + +nios2_mmu_write_tlbacc(uint32_t ig, char c, char r, char w, char x, char g, uint32_t pfn) "mmu_write_tlbacc: ig=0x%02x flags=%c%c%c%c%c pfn=0x%08x" +nios2_mmu_write_tlbmisc(uint32_t way, char r, char w, char t, char b, char p, char d, uint32_t pid) "mmu_write_tlbmisc: way=0x%x flags=%c%c%c%c%c%c pid=%u" +nios2_mmu_write_pteaddr(uint32_t ptb, uint32_t vpn) "mmu_write_pteaddr: ptbase=0x%03x vpn=0x%05x" diff --git a/target/nios2/translate.c b/target/nios2/translate.c index f9abc2fdd2..f89271dbed 100644 --- a/target/nios2/translate.c +++ b/target/nios2/translate.c @@ -447,28 +447,24 @@ static void rdctl(DisasContext *dc, uint32_t code, uint32_t flags) gen_check_supervisor(dc); - switch (instr.imm5 + CR_BASE) { - case CR_PTEADDR: - case CR_TLBACC: - case CR_TLBMISC: - { -#if !defined(CONFIG_USER_ONLY) - if (likely(instr.c != R_ZERO)) { - tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]); -#ifdef DEBUG_MMU - TCGv_i32 tmp = tcg_const_i32(instr.imm5 + CR_BASE); - gen_helper_mmu_read_debug(cpu_R[instr.c], cpu_env, tmp); - tcg_temp_free_i32(tmp); -#endif - } -#endif - break; + if (unlikely(instr.c == R_ZERO)) { + return; } + switch (instr.imm5 + CR_BASE) { + case CR_IPENDING: + /* + * The value of the ipending register is synthetic. + * In hw, this is the AND of a set of hardware irq lines + * with the ienable register. In qemu, we re-use the space + * of CR_IPENDING to store the set of irq lines, and so we + * must perform the AND here, and anywhere else we need the + * guest value of ipending. + */ + tcg_gen_and_tl(cpu_R[instr.c], cpu_R[CR_IPENDING], cpu_R[CR_IENABLE]); + break; default: - if (likely(instr.c != R_ZERO)) { - tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]); - } + tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]); break; } } @@ -476,36 +472,33 @@ static void rdctl(DisasContext *dc, uint32_t code, uint32_t flags) /* ctlN <- rA */ static void wrctl(DisasContext *dc, uint32_t code, uint32_t flags) { - R_TYPE(instr, code); - gen_check_supervisor(dc); +#ifndef CONFIG_USER_ONLY + R_TYPE(instr, code); + TCGv v = load_gpr(dc, instr.a); + switch (instr.imm5 + CR_BASE) { case CR_PTEADDR: + gen_helper_mmu_write_pteaddr(cpu_env, v); + break; case CR_TLBACC: + gen_helper_mmu_write_tlbacc(cpu_env, v); + break; case CR_TLBMISC: - { -#if !defined(CONFIG_USER_ONLY) - TCGv_i32 tmp = tcg_const_i32(instr.imm5 + CR_BASE); - gen_helper_mmu_write(cpu_env, tmp, load_gpr(dc, instr.a)); - tcg_temp_free_i32(tmp); -#endif + gen_helper_mmu_write_tlbmisc(cpu_env, v); break; - } - - default: - tcg_gen_mov_tl(cpu_R[instr.imm5 + CR_BASE], load_gpr(dc, instr.a)); + case CR_IPENDING: + /* ipending is read only, writes ignored. */ break; - } - - /* If interrupts were enabled using WRCTL, trigger them. */ -#if !defined(CONFIG_USER_ONLY) - if ((instr.imm5 + CR_BASE) == CR_STATUS) { - if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } - gen_helper_check_interrupts(cpu_env); + case CR_STATUS: + case CR_IENABLE: + /* If interrupts were enabled using WRCTL, trigger them. */ dc->base.is_jmp = DISAS_UPDATE; + /* fall through */ + default: + tcg_gen_mov_tl(cpu_R[instr.imm5 + CR_BASE], v); + break; } #endif } diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h index ee069b080c..bdf29d2dc4 100644 --- a/target/openrisc/cpu.h +++ b/target/openrisc/cpu.h @@ -24,13 +24,9 @@ #include "hw/core/cpu.h" #include "qom/object.h" -/* cpu_openrisc_map_address_* in CPUOpenRISCTLBContext need this decl. */ -struct OpenRISCCPU; - #define TYPE_OPENRISC_CPU "or1k-cpu" -OBJECT_DECLARE_TYPE(OpenRISCCPU, OpenRISCCPUClass, - OPENRISC_CPU) +OBJECT_DECLARE_CPU_TYPE(OpenRISCCPU, OpenRISCCPUClass, OPENRISC_CPU) /** * OpenRISCCPUClass: @@ -231,18 +227,18 @@ typedef struct CPUOpenRISCTLBContext { OpenRISCTLBEntry itlb[TLB_SIZE]; OpenRISCTLBEntry dtlb[TLB_SIZE]; - int (*cpu_openrisc_map_address_code)(struct OpenRISCCPU *cpu, + int (*cpu_openrisc_map_address_code)(OpenRISCCPU *cpu, hwaddr *physical, int *prot, target_ulong address, int rw); - int (*cpu_openrisc_map_address_data)(struct OpenRISCCPU *cpu, + int (*cpu_openrisc_map_address_data)(OpenRISCCPU *cpu, hwaddr *physical, int *prot, target_ulong address, int rw); } CPUOpenRISCTLBContext; #endif -typedef struct CPUOpenRISCState { +typedef struct CPUArchState { target_ulong shadow_gpr[16][32]; /* Shadow registers */ target_ulong pc; /* Program counter */ @@ -301,7 +297,7 @@ typedef struct CPUOpenRISCState { * * A OpenRISC CPU. */ -struct OpenRISCCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -348,9 +344,6 @@ void cpu_openrisc_count_stop(OpenRISCCPU *cpu); #define OPENRISC_CPU_TYPE_NAME(model) model OPENRISC_CPU_TYPE_SUFFIX #define CPU_RESOLVING_TYPE TYPE_OPENRISC_CPU -typedef CPUOpenRISCState CPUArchState; -typedef OpenRISCCPU ArchCPU; - #include "exec/cpu-all.h" #define TB_FLAGS_SM SR_SM diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h index 98facee9fa..ad7e3c3db9 100644 --- a/target/ppc/cpu-qom.h +++ b/target/ppc/cpu-qom.h @@ -29,10 +29,9 @@ #define TYPE_POWERPC_CPU "powerpc-cpu" #endif -OBJECT_DECLARE_TYPE(PowerPCCPU, PowerPCCPUClass, - POWERPC_CPU) +OBJECT_DECLARE_CPU_TYPE(PowerPCCPU, PowerPCCPUClass, POWERPC_CPU) -typedef struct CPUPPCState CPUPPCState; +typedef struct CPUArchState CPUPPCState; typedef struct ppc_tb_t ppc_tb_t; typedef struct ppc_dcr_t ppc_dcr_t; diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 1b687521c7..047b24ba50 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1077,7 +1077,7 @@ struct ppc_radix_page_info { #define PPC_CPU_OPCODES_LEN 0x40 #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20 -struct CPUPPCState { +struct CPUArchState { /* Most commonly used resources during translated code execution first */ target_ulong gpr[32]; /* general purpose registers */ target_ulong gprh[32]; /* storage for GPR MSB, used by the SPE extension */ @@ -1275,7 +1275,7 @@ typedef struct PPCVirtualHypervisorClass PPCVirtualHypervisorClass; * * A PowerPC CPU. */ -struct PowerPCCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -1477,9 +1477,6 @@ void ppc_compat_add_property(Object *obj, const char *name, uint32_t *compat_pvr, const char *basedesc); #endif /* defined(TARGET_PPC64) */ -typedef CPUPPCState CPUArchState; -typedef PowerPCCPU ArchCPU; - #include "exec/cpu-all.h" /*****************************************************************************/ diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 8f970288f5..bd12db960a 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -2156,9 +2156,8 @@ VSX_TSQRT(xvtsqrtsp, 4, float32, VsrW(i), -126, 23) * maddflgs - flags for the float*muladd routine that control the * various forms (madd, msub, nmadd, nmsub) * sfprf - set FPRF - * r2sp - round intermediate double precision result to single precision */ -#define VSX_MADD(op, nels, tp, fld, maddflgs, sfprf, r2sp) \ +#define VSX_MADD(op, nels, tp, fld, maddflgs, sfprf) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ ppc_vsr_t *s1, ppc_vsr_t *s2, ppc_vsr_t *s3) \ { \ @@ -2170,20 +2169,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ for (i = 0; i < nels; i++) { \ float_status tstat = env->fp_status; \ set_float_exception_flags(0, &tstat); \ - if (r2sp && (tstat.float_rounding_mode == float_round_nearest_even)) {\ - /* \ - * Avoid double rounding errors by rounding the intermediate \ - * result to odd. \ - */ \ - set_float_rounding_mode(float_round_to_zero, &tstat); \ - t.fld = tp##_muladd(s1->fld, s3->fld, s2->fld, \ - maddflgs, &tstat); \ - t.fld |= (get_float_exception_flags(&tstat) & \ - float_flag_inexact) != 0; \ - } else { \ - t.fld = tp##_muladd(s1->fld, s3->fld, s2->fld, \ - maddflgs, &tstat); \ - } \ + t.fld = tp##_muladd(s1->fld, s3->fld, s2->fld, maddflgs, &tstat); \ env->fp_status.float_exception_flags |= tstat.float_exception_flags; \ \ if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \ @@ -2191,10 +2177,6 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ sfprf, GETPC()); \ } \ \ - if (r2sp) { \ - t.fld = do_frsp(env, t.fld, GETPC()); \ - } \ - \ if (sfprf) { \ helper_compute_fprf_float64(env, t.fld); \ } \ @@ -2203,24 +2185,24 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ do_float_check_status(env, GETPC()); \ } -VSX_MADD(XSMADDDP, 1, float64, VsrD(0), MADD_FLGS, 1, 0) -VSX_MADD(XSMSUBDP, 1, float64, VsrD(0), MSUB_FLGS, 1, 0) -VSX_MADD(XSNMADDDP, 1, float64, VsrD(0), NMADD_FLGS, 1, 0) -VSX_MADD(XSNMSUBDP, 1, float64, VsrD(0), NMSUB_FLGS, 1, 0) -VSX_MADD(XSMADDSP, 1, float64, VsrD(0), MADD_FLGS, 1, 1) -VSX_MADD(XSMSUBSP, 1, float64, VsrD(0), MSUB_FLGS, 1, 1) -VSX_MADD(XSNMADDSP, 1, float64, VsrD(0), NMADD_FLGS, 1, 1) -VSX_MADD(XSNMSUBSP, 1, float64, VsrD(0), NMSUB_FLGS, 1, 1) +VSX_MADD(XSMADDDP, 1, float64, VsrD(0), MADD_FLGS, 1) +VSX_MADD(XSMSUBDP, 1, float64, VsrD(0), MSUB_FLGS, 1) +VSX_MADD(XSNMADDDP, 1, float64, VsrD(0), NMADD_FLGS, 1) +VSX_MADD(XSNMSUBDP, 1, float64, VsrD(0), NMSUB_FLGS, 1) +VSX_MADD(XSMADDSP, 1, float64r32, VsrD(0), MADD_FLGS, 1) +VSX_MADD(XSMSUBSP, 1, float64r32, VsrD(0), MSUB_FLGS, 1) +VSX_MADD(XSNMADDSP, 1, float64r32, VsrD(0), NMADD_FLGS, 1) +VSX_MADD(XSNMSUBSP, 1, float64r32, VsrD(0), NMSUB_FLGS, 1) -VSX_MADD(xvmadddp, 2, float64, VsrD(i), MADD_FLGS, 0, 0) -VSX_MADD(xvmsubdp, 2, float64, VsrD(i), MSUB_FLGS, 0, 0) -VSX_MADD(xvnmadddp, 2, float64, VsrD(i), NMADD_FLGS, 0, 0) -VSX_MADD(xvnmsubdp, 2, float64, VsrD(i), NMSUB_FLGS, 0, 0) +VSX_MADD(xvmadddp, 2, float64, VsrD(i), MADD_FLGS, 0) +VSX_MADD(xvmsubdp, 2, float64, VsrD(i), MSUB_FLGS, 0) +VSX_MADD(xvnmadddp, 2, float64, VsrD(i), NMADD_FLGS, 0) +VSX_MADD(xvnmsubdp, 2, float64, VsrD(i), NMSUB_FLGS, 0) -VSX_MADD(xvmaddsp, 4, float32, VsrW(i), MADD_FLGS, 0, 0) -VSX_MADD(xvmsubsp, 4, float32, VsrW(i), MSUB_FLGS, 0, 0) -VSX_MADD(xvnmaddsp, 4, float32, VsrW(i), NMADD_FLGS, 0, 0) -VSX_MADD(xvnmsubsp, 4, float32, VsrW(i), NMSUB_FLGS, 0, 0) +VSX_MADD(xvmaddsp, 4, float32, VsrW(i), MADD_FLGS, 0) +VSX_MADD(xvmsubsp, 4, float32, VsrW(i), MSUB_FLGS, 0) +VSX_MADD(xvnmaddsp, 4, float32, VsrW(i), NMADD_FLGS, 0) +VSX_MADD(xvnmsubsp, 4, float32, VsrW(i), NMSUB_FLGS, 0) /* * VSX_MADDQ - VSX floating point quad-precision muliply/add @@ -2540,6 +2522,8 @@ void helper_##name(CPUPPCState *env, \ ppc_vsr_t t = { }; \ bool first; \ \ + helper_reset_fpstatus(env); \ + \ if (max) { \ first = tp##_le_quiet(xb->fld, xa->fld, &env->fp_status); \ } else { \ @@ -2790,6 +2774,8 @@ void helper_XVCVSPBF16(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) ppc_vsr_t t = { }; int i, status; + helper_reset_fpstatus(env); + for (i = 0; i < 4; i++) { t.VsrH(2 * i + 1) = float32_to_bfloat16(xb->VsrW(i), &env->fp_status); } diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index b2b17bb1ca..492f34c499 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -1072,7 +1072,7 @@ void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) *r = result; } -#define XXGENPCV(NAME, SZ) \ +#define XXGENPCV_BE_EXP(NAME, SZ) \ void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ { \ ppc_vsr_t tmp; \ @@ -1093,8 +1093,9 @@ void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ } \ \ *t = tmp; \ -} \ - \ +} + +#define XXGENPCV_BE_COMP(NAME, SZ) \ void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ { \ ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ @@ -1111,8 +1112,9 @@ void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ } \ \ *t = tmp; \ -} \ - \ +} + +#define XXGENPCV_LE_EXP(NAME, SZ) \ void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ { \ ppc_vsr_t tmp; \ @@ -1135,8 +1137,9 @@ void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \ } \ \ *t = tmp; \ -} \ - \ +} + +#define XXGENPCV_LE_COMP(NAME, SZ) \ void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ { \ ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ @@ -1157,10 +1160,21 @@ void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\ *t = tmp; \ } +#define XXGENPCV(NAME, SZ) \ + XXGENPCV_BE_EXP(NAME, SZ) \ + XXGENPCV_BE_COMP(NAME, SZ) \ + XXGENPCV_LE_EXP(NAME, SZ) \ + XXGENPCV_LE_COMP(NAME, SZ) \ + XXGENPCV(XXGENPCVBM, 1) XXGENPCV(XXGENPCVHM, 2) XXGENPCV(XXGENPCVWM, 4) XXGENPCV(XXGENPCVDM, 8) + +#undef XXGENPCV_BE_EXP +#undef XXGENPCV_BE_COMP +#undef XXGENPCV_LE_EXP +#undef XXGENPCV_LE_COMP #undef XXGENPCV #if defined(HOST_WORDS_BIGENDIAN) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index f91bee839d..6101bca3fd 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -1088,10 +1088,8 @@ static void do_vrlq_mask(TCGv_i64 mh, TCGv_i64 ml, TCGv_i64 b, TCGv_i64 e) tcg_gen_or_i64(tl, t1, tl); /* t = t >> 1 */ - tcg_gen_shli_i64(t0, th, 63); - tcg_gen_shri_i64(tl, tl, 1); + tcg_gen_extract2_i64(tl, tl, th, 1); tcg_gen_shri_i64(th, th, 1); - tcg_gen_or_i64(tl, t0, tl); /* m = m ^ t */ tcg_gen_xor_i64(mh, mh, th); @@ -1148,10 +1146,8 @@ static bool do_vector_rotl_quad(DisasContext *ctx, arg_VX *a, bool mask, tcg_gen_or_i64(t1, ah, t1); if (mask || insert) { - tcg_gen_shri_i64(n, vrb, 8); - tcg_gen_shri_i64(vrb, vrb, 16); - tcg_gen_andi_i64(n, n, 0x7f); - tcg_gen_andi_i64(vrb, vrb, 0x7f); + tcg_gen_extract_i64(n, vrb, 8, 7); + tcg_gen_extract_i64(vrb, vrb, 16, 7); do_vrlq_mask(ah, al, vrb, n); @@ -1161,10 +1157,8 @@ static bool do_vector_rotl_quad(DisasContext *ctx, arg_VX *a, bool mask, if (insert) { get_avr64(n, a->vrt, true); get_avr64(vrb, a->vrt, false); - tcg_gen_not_i64(ah, ah); - tcg_gen_not_i64(al, al); - tcg_gen_and_i64(n, n, ah); - tcg_gen_and_i64(vrb, vrb, al); + tcg_gen_andc_i64(n, n, ah); + tcg_gen_andc_i64(vrb, vrb, al); tcg_gen_or_i64(t0, t0, n); tcg_gen_or_i64(t1, t1, vrb); } @@ -3141,14 +3135,14 @@ static bool trans_VMULLD(DisasContext *ctx, arg_VX *a) return true; } -TRANS_FLAGS2(ALTIVEC_207, VMULESB, do_vx_helper, gen_helper_VMULESB) -TRANS_FLAGS2(ALTIVEC_207, VMULOSB, do_vx_helper, gen_helper_VMULOSB) -TRANS_FLAGS2(ALTIVEC_207, VMULEUB, do_vx_helper, gen_helper_VMULEUB) -TRANS_FLAGS2(ALTIVEC_207, VMULOUB, do_vx_helper, gen_helper_VMULOUB) -TRANS_FLAGS2(ALTIVEC_207, VMULESH, do_vx_helper, gen_helper_VMULESH) -TRANS_FLAGS2(ALTIVEC_207, VMULOSH, do_vx_helper, gen_helper_VMULOSH) -TRANS_FLAGS2(ALTIVEC_207, VMULEUH, do_vx_helper, gen_helper_VMULEUH) -TRANS_FLAGS2(ALTIVEC_207, VMULOUH, do_vx_helper, gen_helper_VMULOUH) +TRANS_FLAGS(ALTIVEC, VMULESB, do_vx_helper, gen_helper_VMULESB) +TRANS_FLAGS(ALTIVEC, VMULOSB, do_vx_helper, gen_helper_VMULOSB) +TRANS_FLAGS(ALTIVEC, VMULEUB, do_vx_helper, gen_helper_VMULEUB) +TRANS_FLAGS(ALTIVEC, VMULOUB, do_vx_helper, gen_helper_VMULOUB) +TRANS_FLAGS(ALTIVEC, VMULESH, do_vx_helper, gen_helper_VMULESH) +TRANS_FLAGS(ALTIVEC, VMULOSH, do_vx_helper, gen_helper_VMULOSH) +TRANS_FLAGS(ALTIVEC, VMULEUH, do_vx_helper, gen_helper_VMULEUH) +TRANS_FLAGS(ALTIVEC, VMULOUH, do_vx_helper, gen_helper_VMULOUH) TRANS_FLAGS2(ALTIVEC_207, VMULESW, do_vx_helper, gen_helper_VMULESW) TRANS_FLAGS2(ALTIVEC_207, VMULOSW, do_vx_helper, gen_helper_VMULOSW) TRANS_FLAGS2(ALTIVEC_207, VMULEUW, do_vx_helper, gen_helper_VMULEUW) @@ -3162,19 +3156,16 @@ static void do_vx_vmulhw_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b, bool sign) { TCGv_i64 hh, lh, temp; - uint64_t c; hh = tcg_temp_new_i64(); lh = tcg_temp_new_i64(); temp = tcg_temp_new_i64(); - c = 0xFFFFFFFF; - if (sign) { tcg_gen_ext32s_i64(lh, a); tcg_gen_ext32s_i64(temp, b); } else { - tcg_gen_andi_i64(lh, a, c); - tcg_gen_andi_i64(temp, b, c); + tcg_gen_ext32u_i64(lh, a); + tcg_gen_ext32u_i64(temp, b); } tcg_gen_mul_i64(lh, lh, temp); @@ -3188,8 +3179,7 @@ static void do_vx_vmulhw_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b, bool sign) tcg_gen_mul_i64(hh, hh, temp); tcg_gen_shri_i64(lh, lh, 32); - tcg_gen_andi_i64(hh, hh, c << 32); - tcg_gen_or_i64(t, hh, lh); + tcg_gen_deposit_i64(t, hh, lh, 0, 32); tcg_temp_free_i64(hh); tcg_temp_free_i64(lh); diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 2ffeab5287..48a97b2d7e 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -1204,43 +1204,44 @@ static bool trans_XXPERMX(DisasContext *ctx, arg_8RR_XX4_uim3 *a) return true; } -#define XXGENPCV(NAME) \ -static bool trans_##NAME(DisasContext *ctx, arg_X_imm5 *a) \ -{ \ - TCGv_ptr xt, vrb; \ - \ - REQUIRE_INSNS_FLAGS2(ctx, ISA310); \ - REQUIRE_VSX(ctx); \ - \ - if (a->imm & ~0x3) { \ - gen_invalid(ctx); \ - return true; \ - } \ - \ - xt = gen_vsr_ptr(a->xt); \ - vrb = gen_avr_ptr(a->vrb); \ - \ - switch (a->imm) { \ - case 0b00000: /* Big-Endian expansion */ \ - glue(gen_helper_, glue(NAME, _be_exp))(xt, vrb); \ - break; \ - case 0b00001: /* Big-Endian compression */ \ - glue(gen_helper_, glue(NAME, _be_comp))(xt, vrb); \ - break; \ - case 0b00010: /* Little-Endian expansion */ \ - glue(gen_helper_, glue(NAME, _le_exp))(xt, vrb); \ - break; \ - case 0b00011: /* Little-Endian compression */ \ - glue(gen_helper_, glue(NAME, _le_comp))(xt, vrb); \ - break; \ - } \ - \ - tcg_temp_free_ptr(xt); \ - tcg_temp_free_ptr(vrb); \ - \ - return true; \ +typedef void (*xxgenpcv_genfn)(TCGv_ptr, TCGv_ptr); + +static bool do_xxgenpcv(DisasContext *ctx, arg_X_imm5 *a, + const xxgenpcv_genfn fn[4]) +{ + TCGv_ptr xt, vrb; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VSX(ctx); + + if (a->imm & ~0x3) { + gen_invalid(ctx); + return true; + } + + xt = gen_vsr_ptr(a->xt); + vrb = gen_avr_ptr(a->vrb); + + fn[a->imm](xt, vrb); + + tcg_temp_free_ptr(xt); + tcg_temp_free_ptr(vrb); + + return true; } +#define XXGENPCV(NAME) \ + static bool trans_##NAME(DisasContext *ctx, arg_X_imm5 *a) \ + { \ + static const xxgenpcv_genfn fn[4] = { \ + gen_helper_##NAME##_be_exp, \ + gen_helper_##NAME##_be_comp, \ + gen_helper_##NAME##_le_exp, \ + gen_helper_##NAME##_le_comp, \ + }; \ + return do_xxgenpcv(ctx, a, fn); \ + } + XXGENPCV(XXGENPCVBM) XXGENPCV(XXGENPCVHM) XXGENPCV(XXGENPCVWM) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index b0a40b83e7..ddda4906ff 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -587,6 +587,11 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) cpu->cfg.ext_d = true; } + if (cpu->cfg.ext_zdinx || cpu->cfg.ext_zhinx || + cpu->cfg.ext_zhinxmin) { + cpu->cfg.ext_zfinx = true; + } + /* Set the ISA extensions, checks should have happened above */ if (cpu->cfg.ext_i) { ext |= RVI; @@ -665,6 +670,13 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) if (cpu->cfg.ext_j) { ext |= RVJ; } + if (cpu->cfg.ext_zfinx && ((ext & (RVF | RVD)) || cpu->cfg.ext_zfh || + cpu->cfg.ext_zfhmin)) { + error_setg(errp, + "'Zfinx' cannot be supported together with 'F', 'D', 'Zfh'," + " 'Zfhmin'"); + return; + } set_misa(env, env->misa_mxl, ext); } @@ -783,6 +795,11 @@ static Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("zbc", RISCVCPU, cfg.ext_zbc, true), DEFINE_PROP_BOOL("zbs", RISCVCPU, cfg.ext_zbs, true), + DEFINE_PROP_BOOL("zdinx", RISCVCPU, cfg.ext_zdinx, false), + DEFINE_PROP_BOOL("zfinx", RISCVCPU, cfg.ext_zfinx, false), + DEFINE_PROP_BOOL("zhinx", RISCVCPU, cfg.ext_zhinx, false), + DEFINE_PROP_BOOL("zhinxmin", RISCVCPU, cfg.ext_zhinxmin, false), + /* Vendor-specific custom extensions */ DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false), diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index 8183fb86d5..c069fe85fa 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -98,7 +98,7 @@ enum { #define MAX_RISCV_PMPS (16) -typedef struct CPURISCVState CPURISCVState; +typedef struct CPUArchState CPURISCVState; #if !defined(CONFIG_USER_ONLY) #include "pmp.h" @@ -113,7 +113,7 @@ FIELD(VTYPE, VMA, 7, 1) FIELD(VTYPE, VEDIV, 8, 2) FIELD(VTYPE, RESERVED, 10, sizeof(target_ulong) * 8 - 11) -struct CPURISCVState { +struct CPUArchState { target_ulong gpr[32]; target_ulong gprh[32]; /* 64 top bits of the 128-bit registers */ uint64_t fpr[32]; /* assume both F and D extensions */ @@ -320,8 +320,7 @@ struct CPURISCVState { uint64_t kvm_timer_frequency; }; -OBJECT_DECLARE_TYPE(RISCVCPU, RISCVCPUClass, - RISCV_CPU) +OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU) /** * RISCVCPUClass: @@ -362,8 +361,12 @@ struct RISCVCPUConfig { bool ext_svinval; bool ext_svnapot; bool ext_svpbmt; + bool ext_zdinx; bool ext_zfh; bool ext_zfhmin; + bool ext_zfinx; + bool ext_zhinx; + bool ext_zhinxmin; bool ext_zve32f; bool ext_zve64f; @@ -391,7 +394,7 @@ typedef struct RISCVCPUConfig RISCVCPUConfig; * * A RISCV CPU. */ -struct RISCVCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -495,8 +498,6 @@ void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong); #define TB_FLAGS_MSTATUS_FS MSTATUS_FS #define TB_FLAGS_MSTATUS_VS MSTATUS_VS -typedef CPURISCVState CPUArchState; -typedef RISCVCPU ArchCPU; #include "exec/cpu-all.h" FIELD(TB_FLAGS, MEM_IDX, 0, 3) diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index 746335bfd6..1c60fb2e80 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -466,9 +466,13 @@ bool riscv_cpu_vector_enabled(CPURISCVState *env) void riscv_cpu_swap_hypervisor_regs(CPURISCVState *env) { - uint64_t mstatus_mask = MSTATUS_MXR | MSTATUS_SUM | MSTATUS_FS | + uint64_t mstatus_mask = MSTATUS_MXR | MSTATUS_SUM | MSTATUS_SPP | MSTATUS_SPIE | MSTATUS_SIE | MSTATUS64_UXL | MSTATUS_VS; + + if (riscv_has_ext(env, RVF)) { + mstatus_mask |= MSTATUS_FS; + } bool current_virt = riscv_cpu_virt_enabled(env); g_assert(riscv_has_ext(env, RVH)); diff --git a/target/riscv/csr.c b/target/riscv/csr.c index a938760a3f..0606cd0ea8 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -23,6 +23,7 @@ #include "cpu.h" #include "qemu/main-loop.h" #include "exec/exec-all.h" +#include "sysemu/cpu-timers.h" /* CSR function table public API */ void riscv_get_csr_ops(int csrno, riscv_csr_operations *ops) @@ -39,7 +40,8 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops) static RISCVException fs(CPURISCVState *env, int csrno) { #if !defined(CONFIG_USER_ONLY) - if (!env->debugger && !riscv_cpu_fp_enabled(env)) { + if (!env->debugger && !riscv_cpu_fp_enabled(env) && + !RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) { return RISCV_EXCP_ILLEGAL_INST; } #endif @@ -302,7 +304,9 @@ static RISCVException write_fflags(CPURISCVState *env, int csrno, target_ulong val) { #if !defined(CONFIG_USER_ONLY) - env->mstatus |= MSTATUS_FS; + if (riscv_has_ext(env, RVF)) { + env->mstatus |= MSTATUS_FS; + } #endif riscv_cpu_set_fflags(env, val & (FSR_AEXC >> FSR_AEXC_SHIFT)); return RISCV_EXCP_NONE; @@ -319,7 +323,9 @@ static RISCVException write_frm(CPURISCVState *env, int csrno, target_ulong val) { #if !defined(CONFIG_USER_ONLY) - env->mstatus |= MSTATUS_FS; + if (riscv_has_ext(env, RVF)) { + env->mstatus |= MSTATUS_FS; + } #endif env->frm = val & (FSR_RD >> FSR_RD_SHIFT); return RISCV_EXCP_NONE; @@ -337,7 +343,9 @@ static RISCVException write_fcsr(CPURISCVState *env, int csrno, target_ulong val) { #if !defined(CONFIG_USER_ONLY) - env->mstatus |= MSTATUS_FS; + if (riscv_has_ext(env, RVF)) { + env->mstatus |= MSTATUS_FS; + } #endif env->frm = (val & FSR_RD) >> FSR_RD_SHIFT; riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT); @@ -653,10 +661,14 @@ static RISCVException write_mstatus(CPURISCVState *env, int csrno, tlb_flush(env_cpu(env)); } mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE | - MSTATUS_SPP | MSTATUS_FS | MSTATUS_MPRV | MSTATUS_SUM | + MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM | MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR | MSTATUS_TW | MSTATUS_VS; + if (riscv_has_ext(env, RVF)) { + mask |= MSTATUS_FS; + } + if (xl != MXL_RV32 || env->debugger) { /* * RV32: MPV and GVA are not in mstatus. The current plan is to @@ -788,6 +800,10 @@ static RISCVException write_misa(CPURISCVState *env, int csrno, return RISCV_EXCP_NONE; } + if (!(val & RVF)) { + env->mstatus &= ~MSTATUS_FS; + } + /* flush translation cache */ tb_flush(env_cpu(env)); env->misa_ext = val; diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c index 4a5982d594..5699c9517f 100644 --- a/target/riscv/fpu_helper.c +++ b/target/riscv/fpu_helper.c @@ -89,19 +89,21 @@ void helper_set_rod_rounding_mode(CPURISCVState *env) static uint64_t do_fmadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2, uint64_t rs3, int flags) { - float16 frs1 = check_nanbox_h(rs1); - float16 frs2 = check_nanbox_h(rs2); - float16 frs3 = check_nanbox_h(rs3); - return nanbox_h(float16_muladd(frs1, frs2, frs3, flags, &env->fp_status)); + float16 frs1 = check_nanbox_h(env, rs1); + float16 frs2 = check_nanbox_h(env, rs2); + float16 frs3 = check_nanbox_h(env, rs3); + return nanbox_h(env, float16_muladd(frs1, frs2, frs3, flags, + &env->fp_status)); } static uint64_t do_fmadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2, uint64_t rs3, int flags) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); - float32 frs3 = check_nanbox_s(rs3); - return nanbox_s(float32_muladd(frs1, frs2, frs3, flags, &env->fp_status)); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); + float32 frs3 = check_nanbox_s(env, rs3); + return nanbox_s(env, float32_muladd(frs1, frs2, frs3, flags, + &env->fp_status)); } uint64_t helper_fmadd_s(CPURISCVState *env, uint64_t frs1, uint64_t frs2, @@ -183,124 +185,124 @@ uint64_t helper_fnmadd_h(CPURISCVState *env, uint64_t frs1, uint64_t frs2, uint64_t helper_fadd_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); - return nanbox_s(float32_add(frs1, frs2, &env->fp_status)); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); + return nanbox_s(env, float32_add(frs1, frs2, &env->fp_status)); } uint64_t helper_fsub_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); - return nanbox_s(float32_sub(frs1, frs2, &env->fp_status)); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); + return nanbox_s(env, float32_sub(frs1, frs2, &env->fp_status)); } uint64_t helper_fmul_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); - return nanbox_s(float32_mul(frs1, frs2, &env->fp_status)); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); + return nanbox_s(env, float32_mul(frs1, frs2, &env->fp_status)); } uint64_t helper_fdiv_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); - return nanbox_s(float32_div(frs1, frs2, &env->fp_status)); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); + return nanbox_s(env, float32_div(frs1, frs2, &env->fp_status)); } uint64_t helper_fmin_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); - return nanbox_s(env->priv_ver < PRIV_VERSION_1_11_0 ? + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); + return nanbox_s(env, env->priv_ver < PRIV_VERSION_1_11_0 ? float32_minnum(frs1, frs2, &env->fp_status) : float32_minimum_number(frs1, frs2, &env->fp_status)); } uint64_t helper_fmax_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); - return nanbox_s(env->priv_ver < PRIV_VERSION_1_11_0 ? + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); + return nanbox_s(env, env->priv_ver < PRIV_VERSION_1_11_0 ? float32_maxnum(frs1, frs2, &env->fp_status) : float32_maximum_number(frs1, frs2, &env->fp_status)); } uint64_t helper_fsqrt_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); - return nanbox_s(float32_sqrt(frs1, &env->fp_status)); + float32 frs1 = check_nanbox_s(env, rs1); + return nanbox_s(env, float32_sqrt(frs1, &env->fp_status)); } target_ulong helper_fle_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); return float32_le(frs1, frs2, &env->fp_status); } target_ulong helper_flt_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); return float32_lt(frs1, frs2, &env->fp_status); } target_ulong helper_feq_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float32 frs1 = check_nanbox_s(rs1); - float32 frs2 = check_nanbox_s(rs2); + float32 frs1 = check_nanbox_s(env, rs1); + float32 frs2 = check_nanbox_s(env, rs2); return float32_eq_quiet(frs1, frs2, &env->fp_status); } target_ulong helper_fcvt_w_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); + float32 frs1 = check_nanbox_s(env, rs1); return float32_to_int32(frs1, &env->fp_status); } target_ulong helper_fcvt_wu_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); + float32 frs1 = check_nanbox_s(env, rs1); return (int32_t)float32_to_uint32(frs1, &env->fp_status); } target_ulong helper_fcvt_l_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); + float32 frs1 = check_nanbox_s(env, rs1); return float32_to_int64(frs1, &env->fp_status); } target_ulong helper_fcvt_lu_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); + float32 frs1 = check_nanbox_s(env, rs1); return float32_to_uint64(frs1, &env->fp_status); } uint64_t helper_fcvt_s_w(CPURISCVState *env, target_ulong rs1) { - return nanbox_s(int32_to_float32((int32_t)rs1, &env->fp_status)); + return nanbox_s(env, int32_to_float32((int32_t)rs1, &env->fp_status)); } uint64_t helper_fcvt_s_wu(CPURISCVState *env, target_ulong rs1) { - return nanbox_s(uint32_to_float32((uint32_t)rs1, &env->fp_status)); + return nanbox_s(env, uint32_to_float32((uint32_t)rs1, &env->fp_status)); } uint64_t helper_fcvt_s_l(CPURISCVState *env, target_ulong rs1) { - return nanbox_s(int64_to_float32(rs1, &env->fp_status)); + return nanbox_s(env, int64_to_float32(rs1, &env->fp_status)); } uint64_t helper_fcvt_s_lu(CPURISCVState *env, target_ulong rs1) { - return nanbox_s(uint64_to_float32(rs1, &env->fp_status)); + return nanbox_s(env, uint64_to_float32(rs1, &env->fp_status)); } -target_ulong helper_fclass_s(uint64_t rs1) +target_ulong helper_fclass_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); + float32 frs1 = check_nanbox_s(env, rs1); return fclass_s(frs1); } @@ -340,12 +342,12 @@ uint64_t helper_fmax_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2) uint64_t helper_fcvt_s_d(CPURISCVState *env, uint64_t rs1) { - return nanbox_s(float64_to_float32(rs1, &env->fp_status)); + return nanbox_s(env, float64_to_float32(rs1, &env->fp_status)); } uint64_t helper_fcvt_d_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); + float32 frs1 = check_nanbox_s(env, rs1); return float32_to_float64(frs1, &env->fp_status); } @@ -416,146 +418,146 @@ target_ulong helper_fclass_d(uint64_t frs1) uint64_t helper_fadd_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float16 frs1 = check_nanbox_h(rs1); - float16 frs2 = check_nanbox_h(rs2); - return nanbox_h(float16_add(frs1, frs2, &env->fp_status)); + float16 frs1 = check_nanbox_h(env, rs1); + float16 frs2 = check_nanbox_h(env, rs2); + return nanbox_h(env, float16_add(frs1, frs2, &env->fp_status)); } uint64_t helper_fsub_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float16 frs1 = check_nanbox_h(rs1); - float16 frs2 = check_nanbox_h(rs2); - return nanbox_h(float16_sub(frs1, frs2, &env->fp_status)); + float16 frs1 = check_nanbox_h(env, rs1); + float16 frs2 = check_nanbox_h(env, rs2); + return nanbox_h(env, float16_sub(frs1, frs2, &env->fp_status)); } uint64_t helper_fmul_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float16 frs1 = check_nanbox_h(rs1); - float16 frs2 = check_nanbox_h(rs2); - return nanbox_h(float16_mul(frs1, frs2, &env->fp_status)); + float16 frs1 = check_nanbox_h(env, rs1); + float16 frs2 = check_nanbox_h(env, rs2); + return nanbox_h(env, float16_mul(frs1, frs2, &env->fp_status)); } uint64_t helper_fdiv_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float16 frs1 = check_nanbox_h(rs1); - float16 frs2 = check_nanbox_h(rs2); - return nanbox_h(float16_div(frs1, frs2, &env->fp_status)); + float16 frs1 = check_nanbox_h(env, rs1); + float16 frs2 = check_nanbox_h(env, rs2); + return nanbox_h(env, float16_div(frs1, frs2, &env->fp_status)); } uint64_t helper_fmin_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float16 frs1 = check_nanbox_h(rs1); - float16 frs2 = check_nanbox_h(rs2); - return nanbox_h(env->priv_ver < PRIV_VERSION_1_11_0 ? + float16 frs1 = check_nanbox_h(env, rs1); + float16 frs2 = check_nanbox_h(env, rs2); + return nanbox_h(env, env->priv_ver < PRIV_VERSION_1_11_0 ? float16_minnum(frs1, frs2, &env->fp_status) : float16_minimum_number(frs1, frs2, &env->fp_status)); } uint64_t helper_fmax_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float16 frs1 = check_nanbox_h(rs1); - float16 frs2 = check_nanbox_h(rs2); - return nanbox_h(env->priv_ver < PRIV_VERSION_1_11_0 ? + float16 frs1 = check_nanbox_h(env, rs1); + float16 frs2 = check_nanbox_h(env, rs2); + return nanbox_h(env, env->priv_ver < PRIV_VERSION_1_11_0 ? float16_maxnum(frs1, frs2, &env->fp_status) : float16_maximum_number(frs1, frs2, &env->fp_status)); } uint64_t helper_fsqrt_h(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_h(rs1); - return nanbox_h(float16_sqrt(frs1, &env->fp_status)); + float16 frs1 = check_nanbox_h(env, rs1); + return nanbox_h(env, float16_sqrt(frs1, &env->fp_status)); } target_ulong helper_fle_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float16 frs1 = check_nanbox_h(rs1); - float16 frs2 = check_nanbox_h(rs2); + float16 frs1 = check_nanbox_h(env, rs1); + float16 frs2 = check_nanbox_h(env, rs2); return float16_le(frs1, frs2, &env->fp_status); } target_ulong helper_flt_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float16 frs1 = check_nanbox_h(rs1); - float16 frs2 = check_nanbox_h(rs2); + float16 frs1 = check_nanbox_h(env, rs1); + float16 frs2 = check_nanbox_h(env, rs2); return float16_lt(frs1, frs2, &env->fp_status); } target_ulong helper_feq_h(CPURISCVState *env, uint64_t rs1, uint64_t rs2) { - float16 frs1 = check_nanbox_h(rs1); - float16 frs2 = check_nanbox_h(rs2); + float16 frs1 = check_nanbox_h(env, rs1); + float16 frs2 = check_nanbox_h(env, rs2); return float16_eq_quiet(frs1, frs2, &env->fp_status); } -target_ulong helper_fclass_h(uint64_t rs1) +target_ulong helper_fclass_h(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_h(rs1); + float16 frs1 = check_nanbox_h(env, rs1); return fclass_h(frs1); } target_ulong helper_fcvt_w_h(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_h(rs1); + float16 frs1 = check_nanbox_h(env, rs1); return float16_to_int32(frs1, &env->fp_status); } target_ulong helper_fcvt_wu_h(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_h(rs1); + float16 frs1 = check_nanbox_h(env, rs1); return (int32_t)float16_to_uint32(frs1, &env->fp_status); } target_ulong helper_fcvt_l_h(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_h(rs1); + float16 frs1 = check_nanbox_h(env, rs1); return float16_to_int64(frs1, &env->fp_status); } target_ulong helper_fcvt_lu_h(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_h(rs1); + float16 frs1 = check_nanbox_h(env, rs1); return float16_to_uint64(frs1, &env->fp_status); } uint64_t helper_fcvt_h_w(CPURISCVState *env, target_ulong rs1) { - return nanbox_h(int32_to_float16((int32_t)rs1, &env->fp_status)); + return nanbox_h(env, int32_to_float16((int32_t)rs1, &env->fp_status)); } uint64_t helper_fcvt_h_wu(CPURISCVState *env, target_ulong rs1) { - return nanbox_h(uint32_to_float16((uint32_t)rs1, &env->fp_status)); + return nanbox_h(env, uint32_to_float16((uint32_t)rs1, &env->fp_status)); } uint64_t helper_fcvt_h_l(CPURISCVState *env, target_ulong rs1) { - return nanbox_h(int64_to_float16(rs1, &env->fp_status)); + return nanbox_h(env, int64_to_float16(rs1, &env->fp_status)); } uint64_t helper_fcvt_h_lu(CPURISCVState *env, target_ulong rs1) { - return nanbox_h(uint64_to_float16(rs1, &env->fp_status)); + return nanbox_h(env, uint64_to_float16(rs1, &env->fp_status)); } uint64_t helper_fcvt_h_s(CPURISCVState *env, uint64_t rs1) { - float32 frs1 = check_nanbox_s(rs1); - return nanbox_h(float32_to_float16(frs1, true, &env->fp_status)); + float32 frs1 = check_nanbox_s(env, rs1); + return nanbox_h(env, float32_to_float16(frs1, true, &env->fp_status)); } uint64_t helper_fcvt_s_h(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_h(rs1); - return nanbox_s(float16_to_float32(frs1, true, &env->fp_status)); + float16 frs1 = check_nanbox_h(env, rs1); + return nanbox_s(env, float16_to_float32(frs1, true, &env->fp_status)); } uint64_t helper_fcvt_h_d(CPURISCVState *env, uint64_t rs1) { - return nanbox_h(float64_to_float16(rs1, true, &env->fp_status)); + return nanbox_h(env, float64_to_float16(rs1, true, &env->fp_status)); } uint64_t helper_fcvt_d_h(CPURISCVState *env, uint64_t rs1) { - float16 frs1 = check_nanbox_h(rs1); + float16 frs1 = check_nanbox_h(env, rs1); return float16_to_float64(frs1, true, &env->fp_status); } diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 72cc2582f4..26bbab2fab 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -38,7 +38,7 @@ DEF_HELPER_FLAGS_2(fcvt_s_w, TCG_CALL_NO_RWG, i64, env, tl) DEF_HELPER_FLAGS_2(fcvt_s_wu, TCG_CALL_NO_RWG, i64, env, tl) DEF_HELPER_FLAGS_2(fcvt_s_l, TCG_CALL_NO_RWG, i64, env, tl) DEF_HELPER_FLAGS_2(fcvt_s_lu, TCG_CALL_NO_RWG, i64, env, tl) -DEF_HELPER_FLAGS_1(fclass_s, TCG_CALL_NO_RWG_SE, tl, i64) +DEF_HELPER_FLAGS_2(fclass_s, TCG_CALL_NO_RWG_SE, tl, env, i64) /* Floating Point - Double Precision */ DEF_HELPER_FLAGS_3(fadd_d, TCG_CALL_NO_RWG, i64, env, i64, i64) @@ -90,7 +90,7 @@ DEF_HELPER_FLAGS_2(fcvt_h_w, TCG_CALL_NO_RWG, i64, env, tl) DEF_HELPER_FLAGS_2(fcvt_h_wu, TCG_CALL_NO_RWG, i64, env, tl) DEF_HELPER_FLAGS_2(fcvt_h_l, TCG_CALL_NO_RWG, i64, env, tl) DEF_HELPER_FLAGS_2(fcvt_h_lu, TCG_CALL_NO_RWG, i64, env, tl) -DEF_HELPER_FLAGS_1(fclass_h, TCG_CALL_NO_RWG_SE, tl, i64) +DEF_HELPER_FLAGS_2(fclass_h, TCG_CALL_NO_RWG_SE, tl, env, i64) /* Special functions */ DEF_HELPER_2(csrr, tl, env, int) diff --git a/target/riscv/insn_trans/trans_rvb.c.inc b/target/riscv/insn_trans/trans_rvb.c.inc index f9bd3b7ec4..e8519a6d69 100644 --- a/target/riscv/insn_trans/trans_rvb.c.inc +++ b/target/riscv/insn_trans/trans_rvb.c.inc @@ -19,25 +19,25 @@ */ #define REQUIRE_ZBA(ctx) do { \ - if (ctx->cfg_ptr->ext_zba) { \ + if (!ctx->cfg_ptr->ext_zba) { \ return false; \ } \ } while (0) #define REQUIRE_ZBB(ctx) do { \ - if (ctx->cfg_ptr->ext_zbb) { \ + if (!ctx->cfg_ptr->ext_zbb) { \ return false; \ } \ } while (0) #define REQUIRE_ZBC(ctx) do { \ - if (ctx->cfg_ptr->ext_zbc) { \ + if (!ctx->cfg_ptr->ext_zbc) { \ return false; \ } \ } while (0) #define REQUIRE_ZBS(ctx) do { \ - if (ctx->cfg_ptr->ext_zbs) { \ + if (!ctx->cfg_ptr->ext_zbs) { \ return false; \ } \ } while (0) diff --git a/target/riscv/insn_trans/trans_rvd.c.inc b/target/riscv/insn_trans/trans_rvd.c.inc index 091ed3a8ad..1397c1ce1c 100644 --- a/target/riscv/insn_trans/trans_rvd.c.inc +++ b/target/riscv/insn_trans/trans_rvd.c.inc @@ -18,6 +18,19 @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ +#define REQUIRE_ZDINX_OR_D(ctx) do { \ + if (!ctx->cfg_ptr->ext_zdinx) { \ + REQUIRE_EXT(ctx, RVD); \ + } \ +} while (0) + +#define REQUIRE_EVEN(ctx, reg) do { \ + if (ctx->cfg_ptr->ext_zdinx && (get_xl(ctx) == MXL_RV32) && \ + ((reg) & 0x1)) { \ + return false; \ + } \ +} while (0) + static bool trans_fld(DisasContext *ctx, arg_fld *a) { TCGv addr; @@ -47,10 +60,17 @@ static bool trans_fsd(DisasContext *ctx, arg_fsd *a) static bool trans_fmadd_d(DisasContext *ctx, arg_fmadd_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2 | a->rs3); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_d(ctx, a->rs3); + gen_set_rm(ctx, a->rm); - gen_helper_fmadd_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fmadd_d(dest, cpu_env, src1, src2, src3); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -58,10 +78,17 @@ static bool trans_fmadd_d(DisasContext *ctx, arg_fmadd_d *a) static bool trans_fmsub_d(DisasContext *ctx, arg_fmsub_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2 | a->rs3); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_d(ctx, a->rs3); + gen_set_rm(ctx, a->rm); - gen_helper_fmsub_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fmsub_d(dest, cpu_env, src1, src2, src3); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -69,10 +96,17 @@ static bool trans_fmsub_d(DisasContext *ctx, arg_fmsub_d *a) static bool trans_fnmsub_d(DisasContext *ctx, arg_fnmsub_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2 | a->rs3); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_d(ctx, a->rs3); + gen_set_rm(ctx, a->rm); - gen_helper_fnmsub_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fnmsub_d(dest, cpu_env, src1, src2, src3); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -80,10 +114,17 @@ static bool trans_fnmsub_d(DisasContext *ctx, arg_fnmsub_d *a) static bool trans_fnmadd_d(DisasContext *ctx, arg_fnmadd_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2 | a->rs3); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_d(ctx, a->rs3); + gen_set_rm(ctx, a->rm); - gen_helper_fnmadd_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fnmadd_d(dest, cpu_env, src1, src2, src3); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -91,12 +132,16 @@ static bool trans_fnmadd_d(DisasContext *ctx, arg_fnmadd_d *a) static bool trans_fadd_d(DisasContext *ctx, arg_fadd_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2); - gen_set_rm(ctx, a->rm); - gen_helper_fadd_d(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); + gen_set_rm(ctx, a->rm); + gen_helper_fadd_d(dest, cpu_env, src1, src2); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -104,12 +149,16 @@ static bool trans_fadd_d(DisasContext *ctx, arg_fadd_d *a) static bool trans_fsub_d(DisasContext *ctx, arg_fsub_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2); - gen_set_rm(ctx, a->rm); - gen_helper_fsub_d(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); + gen_set_rm(ctx, a->rm); + gen_helper_fsub_d(dest, cpu_env, src1, src2); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -117,12 +166,16 @@ static bool trans_fsub_d(DisasContext *ctx, arg_fsub_d *a) static bool trans_fmul_d(DisasContext *ctx, arg_fmul_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2); - gen_set_rm(ctx, a->rm); - gen_helper_fmul_d(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); + gen_set_rm(ctx, a->rm); + gen_helper_fmul_d(dest, cpu_env, src1, src2); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -130,12 +183,16 @@ static bool trans_fmul_d(DisasContext *ctx, arg_fmul_d *a) static bool trans_fdiv_d(DisasContext *ctx, arg_fdiv_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2); - gen_set_rm(ctx, a->rm); - gen_helper_fdiv_d(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); + gen_set_rm(ctx, a->rm); + gen_helper_fdiv_d(dest, cpu_env, src1, src2); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -143,23 +200,34 @@ static bool trans_fdiv_d(DisasContext *ctx, arg_fdiv_d *a) static bool trans_fsqrt_d(DisasContext *ctx, arg_fsqrt_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd | a->rs1); - gen_set_rm(ctx, a->rm); - gen_helper_fsqrt_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + gen_set_rm(ctx, a->rm); + gen_helper_fsqrt_d(dest, cpu_env, src1); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } static bool trans_fsgnj_d(DisasContext *ctx, arg_fsgnj_d *a) { + REQUIRE_FPU; + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); if (a->rs1 == a->rs2) { /* FMOV */ - tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]); + dest = get_fpr_d(ctx, a->rs1); } else { - tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], - cpu_fpr[a->rs1], 0, 63); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); + tcg_gen_deposit_i64(dest, src2, src1, 0, 63); } + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -167,15 +235,22 @@ static bool trans_fsgnj_d(DisasContext *ctx, arg_fsgnj_d *a) static bool trans_fsgnjn_d(DisasContext *ctx, arg_fsgnjn_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + if (a->rs1 == a->rs2) { /* FNEG */ - tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT64_MIN); + tcg_gen_xori_i64(dest, src1, INT64_MIN); } else { + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); TCGv_i64 t0 = tcg_temp_new_i64(); - tcg_gen_not_i64(t0, cpu_fpr[a->rs2]); - tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 63); + tcg_gen_not_i64(t0, src2); + tcg_gen_deposit_i64(dest, t0, src1, 0, 63); tcg_temp_free_i64(t0); } + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -183,15 +258,22 @@ static bool trans_fsgnjn_d(DisasContext *ctx, arg_fsgnjn_d *a) static bool trans_fsgnjx_d(DisasContext *ctx, arg_fsgnjx_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + if (a->rs1 == a->rs2) { /* FABS */ - tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT64_MIN); + tcg_gen_andi_i64(dest, src1, ~INT64_MIN); } else { + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); TCGv_i64 t0 = tcg_temp_new_i64(); - tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT64_MIN); - tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0); + tcg_gen_andi_i64(t0, src2, INT64_MIN); + tcg_gen_xor_i64(dest, src1, t0); tcg_temp_free_i64(t0); } + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -199,11 +281,15 @@ static bool trans_fsgnjx_d(DisasContext *ctx, arg_fsgnjx_d *a) static bool trans_fmin_d(DisasContext *ctx, arg_fmin_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2); - gen_helper_fmin_d(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); + gen_helper_fmin_d(dest, cpu_env, src1, src2); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -211,11 +297,15 @@ static bool trans_fmin_d(DisasContext *ctx, arg_fmin_d *a) static bool trans_fmax_d(DisasContext *ctx, arg_fmax_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd | a->rs1 | a->rs2); - gen_helper_fmax_d(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); + gen_helper_fmax_d(dest, cpu_env, src1, src2); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -223,11 +313,15 @@ static bool trans_fmax_d(DisasContext *ctx, arg_fmax_d *a) static bool trans_fcvt_s_d(DisasContext *ctx, arg_fcvt_s_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rs1); - gen_set_rm(ctx, a->rm); - gen_helper_fcvt_s_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + gen_set_rm(ctx, a->rm); + gen_helper_fcvt_s_d(dest, cpu_env, src1); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -235,11 +329,15 @@ static bool trans_fcvt_s_d(DisasContext *ctx, arg_fcvt_s_d *a) static bool trans_fcvt_d_s(DisasContext *ctx, arg_fcvt_d_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd); - gen_set_rm(ctx, a->rm); - gen_helper_fcvt_d_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + gen_set_rm(ctx, a->rm); + gen_helper_fcvt_d_s(dest, cpu_env, src1); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -247,11 +345,14 @@ static bool trans_fcvt_d_s(DisasContext *ctx, arg_fcvt_d_s *a) static bool trans_feq_d(DisasContext *ctx, arg_feq_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rs1 | a->rs2); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); - gen_helper_feq_d(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_feq_d(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -259,11 +360,14 @@ static bool trans_feq_d(DisasContext *ctx, arg_feq_d *a) static bool trans_flt_d(DisasContext *ctx, arg_flt_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rs1 | a->rs2); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); - gen_helper_flt_d(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_flt_d(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -271,11 +375,14 @@ static bool trans_flt_d(DisasContext *ctx, arg_flt_d *a) static bool trans_fle_d(DisasContext *ctx, arg_fle_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rs1 | a->rs2); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_d(ctx, a->rs2); - gen_helper_fle_d(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fle_d(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -283,11 +390,13 @@ static bool trans_fle_d(DisasContext *ctx, arg_fle_d *a) static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rs1); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); - gen_helper_fclass_d(dest, cpu_fpr[a->rs1]); + gen_helper_fclass_d(dest, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -295,12 +404,14 @@ static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a) static bool trans_fcvt_w_d(DisasContext *ctx, arg_fcvt_w_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rs1); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_w_d(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_w_d(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -308,12 +419,14 @@ static bool trans_fcvt_w_d(DisasContext *ctx, arg_fcvt_w_d *a) static bool trans_fcvt_wu_d(DisasContext *ctx, arg_fcvt_wu_d *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rs1); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_wu_d(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_wu_d(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -321,12 +434,15 @@ static bool trans_fcvt_wu_d(DisasContext *ctx, arg_fcvt_wu_d *a) static bool trans_fcvt_d_w(DisasContext *ctx, arg_fcvt_d_w *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_d_w(cpu_fpr[a->rd], cpu_env, src); + gen_helper_fcvt_d_w(dest, cpu_env, src); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; @@ -335,12 +451,15 @@ static bool trans_fcvt_d_w(DisasContext *ctx, arg_fcvt_d_w *a) static bool trans_fcvt_d_wu(DisasContext *ctx, arg_fcvt_d_wu *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_d_wu(cpu_fpr[a->rd], cpu_env, src); + gen_helper_fcvt_d_wu(dest, cpu_env, src); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; @@ -350,12 +469,14 @@ static bool trans_fcvt_l_d(DisasContext *ctx, arg_fcvt_l_d *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rs1); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_l_d(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_l_d(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -364,12 +485,14 @@ static bool trans_fcvt_lu_d(DisasContext *ctx, arg_fcvt_lu_d *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rs1); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_lu_d(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_lu_d(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -392,12 +515,15 @@ static bool trans_fcvt_d_l(DisasContext *ctx, arg_fcvt_d_l *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_d_l(cpu_fpr[a->rd], cpu_env, src); + gen_helper_fcvt_d_l(dest, cpu_env, src); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; @@ -407,12 +533,15 @@ static bool trans_fcvt_d_lu(DisasContext *ctx, arg_fcvt_d_lu *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZDINX_OR_D(ctx); + REQUIRE_EVEN(ctx, a->rd); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_d_lu(cpu_fpr[a->rd], cpu_env, src); + gen_helper_fcvt_d_lu(dest, cpu_env, src); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; diff --git a/target/riscv/insn_trans/trans_rvf.c.inc b/target/riscv/insn_trans/trans_rvf.c.inc index 0aac87f7db..a1d3eb52ad 100644 --- a/target/riscv/insn_trans/trans_rvf.c.inc +++ b/target/riscv/insn_trans/trans_rvf.c.inc @@ -20,7 +20,14 @@ #define REQUIRE_FPU do {\ if (ctx->mstatus_fs == 0) \ - return false; \ + if (!ctx->cfg_ptr->ext_zfinx) \ + return false; \ +} while (0) + +#define REQUIRE_ZFINX_OR_F(ctx) do {\ + if (!ctx->cfg_ptr->ext_zfinx) { \ + REQUIRE_EXT(ctx, RVF); \ + } \ } while (0) static bool trans_flw(DisasContext *ctx, arg_flw *a) @@ -55,10 +62,16 @@ static bool trans_fsw(DisasContext *ctx, arg_fsw *a) static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); + gen_set_rm(ctx, a->rm); - gen_helper_fmadd_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fmadd_s(dest, cpu_env, src1, src2, src3); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -66,10 +79,16 @@ static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a) static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); + gen_set_rm(ctx, a->rm); - gen_helper_fmsub_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fmsub_s(dest, cpu_env, src1, src2, src3); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -77,10 +96,16 @@ static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a) static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); + gen_set_rm(ctx, a->rm); - gen_helper_fnmsub_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fnmsub_s(dest, cpu_env, src1, src2, src3); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -88,10 +113,16 @@ static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a) static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); + gen_set_rm(ctx, a->rm); - gen_helper_fnmadd_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fnmadd_s(dest, cpu_env, src1, src2, src3); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -99,11 +130,15 @@ static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a) static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); gen_set_rm(ctx, a->rm); - gen_helper_fadd_s(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fadd_s(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -111,11 +146,15 @@ static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a) static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); gen_set_rm(ctx, a->rm); - gen_helper_fsub_s(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fsub_s(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -123,11 +162,15 @@ static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a) static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); gen_set_rm(ctx, a->rm); - gen_helper_fmul_s(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fmul_s(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -135,11 +178,15 @@ static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a) static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); gen_set_rm(ctx, a->rm); - gen_helper_fdiv_s(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fdiv_s(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -147,10 +194,14 @@ static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a) static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fsqrt_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]); + gen_helper_fsqrt_s(dest, cpu_env, src1); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -158,22 +209,37 @@ static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a) static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); if (a->rs1 == a->rs2) { /* FMOV */ - gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]); + if (!ctx->cfg_ptr->ext_zfinx) { + gen_check_nanbox_s(dest, src1); + } else { + tcg_gen_ext32s_i64(dest, src1); + } } else { /* FSGNJ */ - TCGv_i64 rs1 = tcg_temp_new_i64(); - TCGv_i64 rs2 = tcg_temp_new_i64(); - - gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); - gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); - - /* This formulation retains the nanboxing of rs2. */ - tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31); - tcg_temp_free_i64(rs1); - tcg_temp_free_i64(rs2); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + + if (!ctx->cfg_ptr->ext_zfinx) { + TCGv_i64 rs1 = tcg_temp_new_i64(); + TCGv_i64 rs2 = tcg_temp_new_i64(); + gen_check_nanbox_s(rs1, src1); + gen_check_nanbox_s(rs2, src2); + + /* This formulation retains the nanboxing of rs2 in normal 'F'. */ + tcg_gen_deposit_i64(dest, rs2, rs1, 0, 31); + + tcg_temp_free_i64(rs1); + tcg_temp_free_i64(rs2); + } else { + tcg_gen_deposit_i64(dest, src2, src1, 0, 31); + tcg_gen_ext32s_i64(dest, dest); + } } + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -183,16 +249,27 @@ static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) TCGv_i64 rs1, rs2, mask; REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); - rs1 = tcg_temp_new_i64(); - gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + rs1 = tcg_temp_new_i64(); + if (!ctx->cfg_ptr->ext_zfinx) { + gen_check_nanbox_s(rs1, src1); + } else { + tcg_gen_mov_i64(rs1, src1); + } if (a->rs1 == a->rs2) { /* FNEG */ - tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1)); + tcg_gen_xori_i64(dest, rs1, MAKE_64BIT_MASK(31, 1)); } else { + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); rs2 = tcg_temp_new_i64(); - gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); + if (!ctx->cfg_ptr->ext_zfinx) { + gen_check_nanbox_s(rs2, src2); + } else { + tcg_gen_mov_i64(rs2, src2); + } /* * Replace bit 31 in rs1 with inverse in rs2. @@ -200,13 +277,17 @@ static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a) */ mask = tcg_constant_i64(~MAKE_64BIT_MASK(31, 1)); tcg_gen_nor_i64(rs2, rs2, mask); - tcg_gen_and_i64(rs1, mask, rs1); - tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2); + tcg_gen_and_i64(dest, mask, rs1); + tcg_gen_or_i64(dest, dest, rs2); tcg_temp_free_i64(rs2); } + /* signed-extended intead of nanboxing for result if enable zfinx */ + if (ctx->cfg_ptr->ext_zfinx) { + tcg_gen_ext32s_i64(dest, dest); + } + gen_set_fpr_hs(ctx, a->rd, dest); tcg_temp_free_i64(rs1); - mark_fs_dirty(ctx); return true; } @@ -216,28 +297,45 @@ static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) TCGv_i64 rs1, rs2; REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); rs1 = tcg_temp_new_i64(); - gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); + + if (!ctx->cfg_ptr->ext_zfinx) { + gen_check_nanbox_s(rs1, src1); + } else { + tcg_gen_mov_i64(rs1, src1); + } if (a->rs1 == a->rs2) { /* FABS */ - tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1)); + tcg_gen_andi_i64(dest, rs1, ~MAKE_64BIT_MASK(31, 1)); } else { + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); rs2 = tcg_temp_new_i64(); - gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); + + if (!ctx->cfg_ptr->ext_zfinx) { + gen_check_nanbox_s(rs2, src2); + } else { + tcg_gen_mov_i64(rs2, src2); + } /* * Xor bit 31 in rs1 with that in rs2. * This formulation retains the nanboxing of rs1. */ - tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1)); - tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2); + tcg_gen_andi_i64(dest, rs2, MAKE_64BIT_MASK(31, 1)); + tcg_gen_xor_i64(dest, rs1, dest); tcg_temp_free_i64(rs2); } + /* signed-extended intead of nanboxing for result if enable zfinx */ + if (ctx->cfg_ptr->ext_zfinx) { + tcg_gen_ext32s_i64(dest, dest); + } tcg_temp_free_i64(rs1); - + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -245,10 +343,14 @@ static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a) static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_fmin_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2]); + gen_helper_fmin_s(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -256,10 +358,14 @@ static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a) static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_fmax_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2]); + gen_helper_fmax_s(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -267,12 +373,13 @@ static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a) static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_w_s(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_w_s(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -280,12 +387,13 @@ static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a) static bool trans_fcvt_wu_s(DisasContext *ctx, arg_fcvt_wu_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_wu_s(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_wu_s(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -294,14 +402,14 @@ static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a) { /* NOTE: This was FMV.X.S in an earlier version of the ISA spec! */ REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); - + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); #if defined(TARGET_RISCV64) - tcg_gen_ext32s_tl(dest, cpu_fpr[a->rs1]); + tcg_gen_ext32s_tl(dest, src1); #else - tcg_gen_extrl_i64_i32(dest, cpu_fpr[a->rs1]); + tcg_gen_extrl_i64_i32(dest, src1); #endif gen_set_gpr(ctx, a->rd, dest); @@ -311,11 +419,13 @@ static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a) static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_feq_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_feq_s(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -323,11 +433,13 @@ static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a) static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_flt_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_flt_s(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -335,11 +447,13 @@ static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a) static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_fle_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fle_s(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -347,11 +461,12 @@ static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a) static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); - gen_helper_fclass_s(dest, cpu_fpr[a->rs1]); + gen_helper_fclass_s(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -359,13 +474,14 @@ static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a) static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_s_w(cpu_fpr[a->rd], cpu_env, src); - + gen_helper_fcvt_s_w(dest, cpu_env, src); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -373,13 +489,14 @@ static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a) static bool trans_fcvt_s_wu(DisasContext *ctx, arg_fcvt_s_wu *a) { REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_s_wu(cpu_fpr[a->rd], cpu_env, src); - + gen_helper_fcvt_s_wu(dest, cpu_env, src); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -388,13 +505,14 @@ static bool trans_fmv_w_x(DisasContext *ctx, arg_fmv_w_x *a) { /* NOTE: This was FMV.S.X in an earlier version of the ISA spec! */ REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO); - tcg_gen_extu_tl_i64(cpu_fpr[a->rd], src); - gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]); - + tcg_gen_extu_tl_i64(dest, src); + gen_nanbox_s(dest, dest); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -403,12 +521,13 @@ static bool trans_fcvt_l_s(DisasContext *ctx, arg_fcvt_l_s *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_l_s(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_l_s(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -417,12 +536,13 @@ static bool trans_fcvt_lu_s(DisasContext *ctx, arg_fcvt_lu_s *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_lu_s(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_lu_s(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -431,13 +551,14 @@ static bool trans_fcvt_s_l(DisasContext *ctx, arg_fcvt_s_l *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_s_l(cpu_fpr[a->rd], cpu_env, src); - + gen_helper_fcvt_s_l(dest, cpu_env, src); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -446,13 +567,14 @@ static bool trans_fcvt_s_lu(DisasContext *ctx, arg_fcvt_s_lu *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_EXT(ctx, RVF); + REQUIRE_ZFINX_OR_F(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_s_lu(cpu_fpr[a->rd], cpu_env, src); - + gen_helper_fcvt_s_lu(dest, cpu_env, src); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } diff --git a/target/riscv/insn_trans/trans_rvzfh.c.inc b/target/riscv/insn_trans/trans_rvzfh.c.inc index 608c51da2c..5d07150cd0 100644 --- a/target/riscv/insn_trans/trans_rvzfh.c.inc +++ b/target/riscv/insn_trans/trans_rvzfh.c.inc @@ -22,12 +22,25 @@ } \ } while (0) +#define REQUIRE_ZHINX_OR_ZFH(ctx) do { \ + if (!ctx->cfg_ptr->ext_zhinx && !ctx->cfg_ptr->ext_zfh) { \ + return false; \ + } \ +} while (0) + #define REQUIRE_ZFH_OR_ZFHMIN(ctx) do { \ if (!(ctx->cfg_ptr->ext_zfh || ctx->cfg_ptr->ext_zfhmin)) { \ return false; \ } \ } while (0) +#define REQUIRE_ZFH_OR_ZFHMIN_OR_ZHINX_OR_ZHINXMIN(ctx) do { \ + if (!(ctx->cfg_ptr->ext_zfh || ctx->cfg_ptr->ext_zfhmin || \ + ctx->cfg_ptr->ext_zhinx || ctx->cfg_ptr->ext_zhinxmin)) { \ + return false; \ + } \ +} while (0) + static bool trans_flh(DisasContext *ctx, arg_flh *a) { TCGv_i64 dest; @@ -73,11 +86,16 @@ static bool trans_fsh(DisasContext *ctx, arg_fsh *a) static bool trans_fmadd_h(DisasContext *ctx, arg_fmadd_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); gen_set_rm(ctx, a->rm); - gen_helper_fmadd_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fmadd_h(dest, cpu_env, src1, src2, src3); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -85,11 +103,16 @@ static bool trans_fmadd_h(DisasContext *ctx, arg_fmadd_h *a) static bool trans_fmsub_h(DisasContext *ctx, arg_fmsub_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); gen_set_rm(ctx, a->rm); - gen_helper_fmsub_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fmsub_h(dest, cpu_env, src1, src2, src3); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -97,11 +120,16 @@ static bool trans_fmsub_h(DisasContext *ctx, arg_fmsub_h *a) static bool trans_fnmsub_h(DisasContext *ctx, arg_fnmsub_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); gen_set_rm(ctx, a->rm); - gen_helper_fnmsub_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fnmsub_h(dest, cpu_env, src1, src2, src3); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -109,11 +137,16 @@ static bool trans_fnmsub_h(DisasContext *ctx, arg_fnmsub_h *a) static bool trans_fnmadd_h(DisasContext *ctx, arg_fnmadd_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + TCGv_i64 src3 = get_fpr_hs(ctx, a->rs3); gen_set_rm(ctx, a->rm); - gen_helper_fnmadd_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2], cpu_fpr[a->rs3]); + gen_helper_fnmadd_h(dest, cpu_env, src1, src2, src3); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -121,11 +154,15 @@ static bool trans_fnmadd_h(DisasContext *ctx, arg_fnmadd_h *a) static bool trans_fadd_h(DisasContext *ctx, arg_fadd_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); gen_set_rm(ctx, a->rm); - gen_helper_fadd_h(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fadd_h(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -133,11 +170,15 @@ static bool trans_fadd_h(DisasContext *ctx, arg_fadd_h *a) static bool trans_fsub_h(DisasContext *ctx, arg_fsub_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); gen_set_rm(ctx, a->rm); - gen_helper_fsub_h(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fsub_h(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -145,11 +186,15 @@ static bool trans_fsub_h(DisasContext *ctx, arg_fsub_h *a) static bool trans_fmul_h(DisasContext *ctx, arg_fmul_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); gen_set_rm(ctx, a->rm); - gen_helper_fmul_h(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fmul_h(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -157,11 +202,15 @@ static bool trans_fmul_h(DisasContext *ctx, arg_fmul_h *a) static bool trans_fdiv_h(DisasContext *ctx, arg_fdiv_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); gen_set_rm(ctx, a->rm); - gen_helper_fdiv_h(cpu_fpr[a->rd], cpu_env, - cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fdiv_h(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -169,10 +218,14 @@ static bool trans_fdiv_h(DisasContext *ctx, arg_fdiv_h *a) static bool trans_fsqrt_h(DisasContext *ctx, arg_fsqrt_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fsqrt_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]); + gen_helper_fsqrt_h(dest, cpu_env, src1); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -180,23 +233,37 @@ static bool trans_fsqrt_h(DisasContext *ctx, arg_fsqrt_h *a) static bool trans_fsgnj_h(DisasContext *ctx, arg_fsgnj_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); if (a->rs1 == a->rs2) { /* FMOV */ - gen_check_nanbox_h(cpu_fpr[a->rd], cpu_fpr[a->rs1]); + if (!ctx->cfg_ptr->ext_zfinx) { + gen_check_nanbox_h(dest, src1); + } else { + tcg_gen_ext16s_i64(dest, src1); + } } else { - TCGv_i64 rs1 = tcg_temp_new_i64(); - TCGv_i64 rs2 = tcg_temp_new_i64(); - - gen_check_nanbox_h(rs1, cpu_fpr[a->rs1]); - gen_check_nanbox_h(rs2, cpu_fpr[a->rs2]); - - /* This formulation retains the nanboxing of rs2. */ - tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 15); - tcg_temp_free_i64(rs1); - tcg_temp_free_i64(rs2); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + + if (!ctx->cfg_ptr->ext_zfinx) { + TCGv_i64 rs1 = tcg_temp_new_i64(); + TCGv_i64 rs2 = tcg_temp_new_i64(); + gen_check_nanbox_h(rs1, src1); + gen_check_nanbox_h(rs2, src2); + + /* This formulation retains the nanboxing of rs2 in normal 'Zfh'. */ + tcg_gen_deposit_i64(dest, rs2, rs1, 0, 15); + + tcg_temp_free_i64(rs1); + tcg_temp_free_i64(rs2); + } else { + tcg_gen_deposit_i64(dest, src2, src1, 0, 15); + tcg_gen_ext16s_i64(dest, dest); + } } - + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -206,16 +273,29 @@ static bool trans_fsgnjn_h(DisasContext *ctx, arg_fsgnjn_h *a) TCGv_i64 rs1, rs2, mask; REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); rs1 = tcg_temp_new_i64(); - gen_check_nanbox_h(rs1, cpu_fpr[a->rs1]); + if (!ctx->cfg_ptr->ext_zfinx) { + gen_check_nanbox_h(rs1, src1); + } else { + tcg_gen_mov_i64(rs1, src1); + } if (a->rs1 == a->rs2) { /* FNEG */ - tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(15, 1)); + tcg_gen_xori_i64(dest, rs1, MAKE_64BIT_MASK(15, 1)); } else { + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); rs2 = tcg_temp_new_i64(); - gen_check_nanbox_h(rs2, cpu_fpr[a->rs2]); + + if (!ctx->cfg_ptr->ext_zfinx) { + gen_check_nanbox_h(rs2, src2); + } else { + tcg_gen_mov_i64(rs2, src2); + } /* * Replace bit 15 in rs1 with inverse in rs2. @@ -224,12 +304,17 @@ static bool trans_fsgnjn_h(DisasContext *ctx, arg_fsgnjn_h *a) mask = tcg_const_i64(~MAKE_64BIT_MASK(15, 1)); tcg_gen_not_i64(rs2, rs2); tcg_gen_andc_i64(rs2, rs2, mask); - tcg_gen_and_i64(rs1, mask, rs1); - tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2); + tcg_gen_and_i64(dest, mask, rs1); + tcg_gen_or_i64(dest, dest, rs2); tcg_temp_free_i64(mask); tcg_temp_free_i64(rs2); } + /* signed-extended intead of nanboxing for result if enable zfinx */ + if (ctx->cfg_ptr->ext_zfinx) { + tcg_gen_ext16s_i64(dest, dest); + } + tcg_temp_free_i64(rs1); mark_fs_dirty(ctx); return true; } @@ -239,27 +324,44 @@ static bool trans_fsgnjx_h(DisasContext *ctx, arg_fsgnjx_h *a) TCGv_i64 rs1, rs2; REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); rs1 = tcg_temp_new_i64(); - gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]); + if (!ctx->cfg_ptr->ext_zfinx) { + gen_check_nanbox_h(rs1, src1); + } else { + tcg_gen_mov_i64(rs1, src1); + } if (a->rs1 == a->rs2) { /* FABS */ - tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(15, 1)); + tcg_gen_andi_i64(dest, rs1, ~MAKE_64BIT_MASK(15, 1)); } else { + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); rs2 = tcg_temp_new_i64(); - gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]); + + if (!ctx->cfg_ptr->ext_zfinx) { + gen_check_nanbox_h(rs2, src2); + } else { + tcg_gen_mov_i64(rs2, src2); + } /* * Xor bit 15 in rs1 with that in rs2. * This formulation retains the nanboxing of rs1. */ - tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(15, 1)); - tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2); + tcg_gen_andi_i64(dest, rs2, MAKE_64BIT_MASK(15, 1)); + tcg_gen_xor_i64(dest, rs1, dest); tcg_temp_free_i64(rs2); } - + /* signed-extended intead of nanboxing for result if enable zfinx */ + if (ctx->cfg_ptr->ext_zfinx) { + tcg_gen_ext16s_i64(dest, dest); + } + tcg_temp_free_i64(rs1); mark_fs_dirty(ctx); return true; } @@ -267,10 +369,14 @@ static bool trans_fsgnjx_h(DisasContext *ctx, arg_fsgnjx_h *a) static bool trans_fmin_h(DisasContext *ctx, arg_fmin_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_fmin_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2]); + gen_helper_fmin_h(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -278,10 +384,14 @@ static bool trans_fmin_h(DisasContext *ctx, arg_fmin_h *a) static bool trans_fmax_h(DisasContext *ctx, arg_fmax_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); - gen_helper_fmax_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1], - cpu_fpr[a->rs2]); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); + + gen_helper_fmax_h(dest, cpu_env, src1, src2); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; } @@ -289,10 +399,14 @@ static bool trans_fmax_h(DisasContext *ctx, arg_fmax_h *a) static bool trans_fcvt_s_h(DisasContext *ctx, arg_fcvt_s_h *a) { REQUIRE_FPU; - REQUIRE_ZFH_OR_ZFHMIN(ctx); + REQUIRE_ZFH_OR_ZFHMIN_OR_ZHINX_OR_ZHINXMIN(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_s_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_s_h(dest, cpu_env, src1); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); @@ -302,26 +416,32 @@ static bool trans_fcvt_s_h(DisasContext *ctx, arg_fcvt_s_h *a) static bool trans_fcvt_d_h(DisasContext *ctx, arg_fcvt_d_h *a) { REQUIRE_FPU; - REQUIRE_ZFH_OR_ZFHMIN(ctx); - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZFH_OR_ZFHMIN_OR_ZHINX_OR_ZHINXMIN(ctx); + REQUIRE_ZDINX_OR_D(ctx); + + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_d_h(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_d_h(dest, cpu_env, src1); + gen_set_fpr_d(ctx, a->rd, dest); mark_fs_dirty(ctx); - return true; } static bool trans_fcvt_h_s(DisasContext *ctx, arg_fcvt_h_s *a) { REQUIRE_FPU; - REQUIRE_ZFH_OR_ZFHMIN(ctx); + REQUIRE_ZFH_OR_ZFHMIN_OR_ZHINX_OR_ZHINXMIN(ctx); - gen_set_rm(ctx, a->rm); - gen_helper_fcvt_h_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + gen_set_rm(ctx, a->rm); + gen_helper_fcvt_h_s(dest, cpu_env, src1); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; @@ -330,12 +450,15 @@ static bool trans_fcvt_h_s(DisasContext *ctx, arg_fcvt_h_s *a) static bool trans_fcvt_h_d(DisasContext *ctx, arg_fcvt_h_d *a) { REQUIRE_FPU; - REQUIRE_ZFH_OR_ZFHMIN(ctx); - REQUIRE_EXT(ctx, RVD); + REQUIRE_ZFH_OR_ZFHMIN_OR_ZHINX_OR_ZHINXMIN(ctx); + REQUIRE_ZDINX_OR_D(ctx); - gen_set_rm(ctx, a->rm); - gen_helper_fcvt_h_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]); + TCGv_i64 dest = dest_fpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_d(ctx, a->rs1); + gen_set_rm(ctx, a->rm); + gen_helper_fcvt_h_d(dest, cpu_env, src1); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; @@ -344,11 +467,13 @@ static bool trans_fcvt_h_d(DisasContext *ctx, arg_fcvt_h_d *a) static bool trans_feq_h(DisasContext *ctx, arg_feq_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_feq_h(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_feq_h(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -356,11 +481,13 @@ static bool trans_feq_h(DisasContext *ctx, arg_feq_h *a) static bool trans_flt_h(DisasContext *ctx, arg_flt_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_flt_h(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_flt_h(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; @@ -369,11 +496,13 @@ static bool trans_flt_h(DisasContext *ctx, arg_flt_h *a) static bool trans_fle_h(DisasContext *ctx, arg_fle_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); + TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_fle_h(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]); + gen_helper_fle_h(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -381,11 +510,12 @@ static bool trans_fle_h(DisasContext *ctx, arg_fle_h *a) static bool trans_fclass_h(DisasContext *ctx, arg_fclass_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); - gen_helper_fclass_h(dest, cpu_fpr[a->rs1]); + gen_helper_fclass_h(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -393,12 +523,13 @@ static bool trans_fclass_h(DisasContext *ctx, arg_fclass_h *a) static bool trans_fcvt_w_h(DisasContext *ctx, arg_fcvt_w_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_w_h(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_w_h(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -406,12 +537,13 @@ static bool trans_fcvt_w_h(DisasContext *ctx, arg_fcvt_w_h *a) static bool trans_fcvt_wu_h(DisasContext *ctx, arg_fcvt_wu_h *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_wu_h(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_wu_h(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -419,12 +551,14 @@ static bool trans_fcvt_wu_h(DisasContext *ctx, arg_fcvt_wu_h *a) static bool trans_fcvt_h_w(DisasContext *ctx, arg_fcvt_h_w *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv t0 = get_gpr(ctx, a->rs1, EXT_SIGN); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_h_w(cpu_fpr[a->rd], cpu_env, t0); + gen_helper_fcvt_h_w(dest, cpu_env, t0); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; @@ -433,12 +567,14 @@ static bool trans_fcvt_h_w(DisasContext *ctx, arg_fcvt_h_w *a) static bool trans_fcvt_h_wu(DisasContext *ctx, arg_fcvt_h_wu *a) { REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv t0 = get_gpr(ctx, a->rs1, EXT_SIGN); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_h_wu(cpu_fpr[a->rd], cpu_env, t0); + gen_helper_fcvt_h_wu(dest, cpu_env, t0); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; @@ -482,12 +618,13 @@ static bool trans_fcvt_l_h(DisasContext *ctx, arg_fcvt_l_h *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_l_h(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_l_h(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -496,12 +633,13 @@ static bool trans_fcvt_lu_h(DisasContext *ctx, arg_fcvt_lu_h *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); TCGv dest = dest_gpr(ctx, a->rd); + TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_lu_h(dest, cpu_env, cpu_fpr[a->rs1]); + gen_helper_fcvt_lu_h(dest, cpu_env, src1); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -510,12 +648,14 @@ static bool trans_fcvt_h_l(DisasContext *ctx, arg_fcvt_h_l *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv t0 = get_gpr(ctx, a->rs1, EXT_SIGN); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_h_l(cpu_fpr[a->rd], cpu_env, t0); + gen_helper_fcvt_h_l(dest, cpu_env, t0); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; @@ -525,12 +665,14 @@ static bool trans_fcvt_h_lu(DisasContext *ctx, arg_fcvt_h_lu *a) { REQUIRE_64BIT(ctx); REQUIRE_FPU; - REQUIRE_ZFH(ctx); + REQUIRE_ZHINX_OR_ZFH(ctx); + TCGv_i64 dest = dest_fpr(ctx, a->rd); TCGv t0 = get_gpr(ctx, a->rs1, EXT_SIGN); gen_set_rm(ctx, a->rm); - gen_helper_fcvt_h_lu(cpu_fpr[a->rd], cpu_env, t0); + gen_helper_fcvt_h_lu(dest, cpu_env, t0); + gen_set_fpr_hs(ctx, a->rd, dest); mark_fs_dirty(ctx); return true; diff --git a/target/riscv/internals.h b/target/riscv/internals.h index 065e8162a2..dbb322bfa7 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -46,13 +46,23 @@ enum { RISCV_FRM_ROD = 8, /* Round to Odd */ }; -static inline uint64_t nanbox_s(float32 f) +static inline uint64_t nanbox_s(CPURISCVState *env, float32 f) { - return f | MAKE_64BIT_MASK(32, 32); + /* the value is sign-extended instead of NaN-boxing for zfinx */ + if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) { + return (int32_t)f; + } else { + return f | MAKE_64BIT_MASK(32, 32); + } } -static inline float32 check_nanbox_s(uint64_t f) +static inline float32 check_nanbox_s(CPURISCVState *env, uint64_t f) { + /* Disable NaN-boxing check when enable zfinx */ + if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) { + return (uint32_t)f; + } + uint64_t mask = MAKE_64BIT_MASK(32, 32); if (likely((f & mask) == mask)) { @@ -62,13 +72,23 @@ static inline float32 check_nanbox_s(uint64_t f) } } -static inline uint64_t nanbox_h(float16 f) +static inline uint64_t nanbox_h(CPURISCVState *env, float16 f) { - return f | MAKE_64BIT_MASK(16, 48); + /* the value is sign-extended instead of NaN-boxing for zfinx */ + if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) { + return (int16_t)f; + } else { + return f | MAKE_64BIT_MASK(16, 48); + } } -static inline float16 check_nanbox_h(uint64_t f) +static inline float16 check_nanbox_h(CPURISCVState *env, uint64_t f) { + /* Disable nanbox check when enable zfinx */ + if (RISCV_CPU(env_cpu(env))->cfg.ext_zfinx) { + return (uint16_t)f; + } + uint64_t mask = MAKE_64BIT_MASK(16, 48); if (likely((f & mask) == mask)) { diff --git a/target/riscv/pmp.h b/target/riscv/pmp.h index a9a0b363a7..fcb6b7c467 100644 --- a/target/riscv/pmp.h +++ b/target/riscv/pmp.h @@ -22,6 +22,8 @@ #ifndef RISCV_PMP_H #define RISCV_PMP_H +#include "cpu.h" + typedef enum { PMP_READ = 1 << 0, PMP_WRITE = 1 << 1, diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 84dbfa6340..fac998a6b5 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -101,6 +101,9 @@ typedef struct DisasContext { TCGv zero; /* Space for 3 operands plus 1 extra for address computation. */ TCGv temp[4]; + /* Space for 4 operands(1 dest and <=3 src) for float point computation */ + TCGv_i64 ftemp[4]; + uint8_t nftemp; /* PointerMasking extension */ bool pm_mask_enabled; bool pm_base_enabled; @@ -380,6 +383,138 @@ static void gen_set_gpr128(DisasContext *ctx, int reg_num, TCGv rl, TCGv rh) } } +static TCGv_i64 ftemp_new(DisasContext *ctx) +{ + assert(ctx->nftemp < ARRAY_SIZE(ctx->ftemp)); + return ctx->ftemp[ctx->nftemp++] = tcg_temp_new_i64(); +} + +static TCGv_i64 get_fpr_hs(DisasContext *ctx, int reg_num) +{ + if (!ctx->cfg_ptr->ext_zfinx) { + return cpu_fpr[reg_num]; + } + + if (reg_num == 0) { + return tcg_constant_i64(0); + } + switch (get_xl(ctx)) { + case MXL_RV32: +#ifdef TARGET_RISCV32 + { + TCGv_i64 t = ftemp_new(ctx); + tcg_gen_ext_i32_i64(t, cpu_gpr[reg_num]); + return t; + } +#else + /* fall through */ + case MXL_RV64: + return cpu_gpr[reg_num]; +#endif + default: + g_assert_not_reached(); + } +} + +static TCGv_i64 get_fpr_d(DisasContext *ctx, int reg_num) +{ + if (!ctx->cfg_ptr->ext_zfinx) { + return cpu_fpr[reg_num]; + } + + if (reg_num == 0) { + return tcg_constant_i64(0); + } + switch (get_xl(ctx)) { + case MXL_RV32: + { + TCGv_i64 t = ftemp_new(ctx); + tcg_gen_concat_tl_i64(t, cpu_gpr[reg_num], cpu_gpr[reg_num + 1]); + return t; + } +#ifdef TARGET_RISCV64 + case MXL_RV64: + return cpu_gpr[reg_num]; +#endif + default: + g_assert_not_reached(); + } +} + +static TCGv_i64 dest_fpr(DisasContext *ctx, int reg_num) +{ + if (!ctx->cfg_ptr->ext_zfinx) { + return cpu_fpr[reg_num]; + } + + if (reg_num == 0) { + return ftemp_new(ctx); + } + + switch (get_xl(ctx)) { + case MXL_RV32: + return ftemp_new(ctx); +#ifdef TARGET_RISCV64 + case MXL_RV64: + return cpu_gpr[reg_num]; +#endif + default: + g_assert_not_reached(); + } +} + +/* assume t is nanboxing (for normal) or sign-extended (for zfinx) */ +static void gen_set_fpr_hs(DisasContext *ctx, int reg_num, TCGv_i64 t) +{ + if (!ctx->cfg_ptr->ext_zfinx) { + tcg_gen_mov_i64(cpu_fpr[reg_num], t); + return; + } + if (reg_num != 0) { + switch (get_xl(ctx)) { + case MXL_RV32: +#ifdef TARGET_RISCV32 + tcg_gen_extrl_i64_i32(cpu_gpr[reg_num], t); + break; +#else + /* fall through */ + case MXL_RV64: + tcg_gen_mov_i64(cpu_gpr[reg_num], t); + break; +#endif + default: + g_assert_not_reached(); + } + } +} + +static void gen_set_fpr_d(DisasContext *ctx, int reg_num, TCGv_i64 t) +{ + if (!ctx->cfg_ptr->ext_zfinx) { + tcg_gen_mov_i64(cpu_fpr[reg_num], t); + return; + } + + if (reg_num != 0) { + switch (get_xl(ctx)) { + case MXL_RV32: +#ifdef TARGET_RISCV32 + tcg_gen_extr_i64_i32(cpu_gpr[reg_num], cpu_gpr[reg_num + 1], t); + break; +#else + tcg_gen_ext32s_i64(cpu_gpr[reg_num], t); + tcg_gen_sari_i64(cpu_gpr[reg_num + 1], t, 32); + break; + case MXL_RV64: + tcg_gen_mov_i64(cpu_gpr[reg_num], t); + break; +#endif + default: + g_assert_not_reached(); + } + } +} + static void gen_jal(DisasContext *ctx, int rd, target_ulong imm) { target_ulong next_pc; @@ -426,6 +561,10 @@ static void mark_fs_dirty(DisasContext *ctx) { TCGv tmp; + if (!has_ext(ctx, RVF)) { + return; + } + if (ctx->mstatus_fs != MSTATUS_FS) { /* Remember the state change for the rest of the TB. */ ctx->mstatus_fs = MSTATUS_FS; @@ -951,6 +1090,8 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->cs = cs; ctx->ntemp = 0; memset(ctx->temp, 0, sizeof(ctx->temp)); + ctx->nftemp = 0; + memset(ctx->ftemp, 0, sizeof(ctx->ftemp)); ctx->pm_mask_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_MASK_ENABLED); ctx->pm_base_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_BASE_ENABLED); ctx->zero = tcg_constant_tl(0); @@ -972,16 +1113,22 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) DisasContext *ctx = container_of(dcbase, DisasContext, base); CPURISCVState *env = cpu->env_ptr; uint16_t opcode16 = translator_lduw(env, &ctx->base, ctx->base.pc_next); + int i; ctx->ol = ctx->xl; decode_opc(env, ctx, opcode16); ctx->base.pc_next = ctx->pc_succ_insn; - for (int i = ctx->ntemp - 1; i >= 0; --i) { + for (i = ctx->ntemp - 1; i >= 0; --i) { tcg_temp_free(ctx->temp[i]); ctx->temp[i] = NULL; } ctx->ntemp = 0; + for (i = ctx->nftemp - 1; i >= 0; --i) { + tcg_temp_free_i64(ctx->ftemp[i]); + ctx->ftemp[i] = NULL; + } + ctx->nftemp = 0; if (ctx->base.is_jmp == DISAS_NEXT) { target_ulong page_start; diff --git a/target/rx/cpu-qom.h b/target/rx/cpu-qom.h index 7310558e0c..4533759d96 100644 --- a/target/rx/cpu-qom.h +++ b/target/rx/cpu-qom.h @@ -26,8 +26,7 @@ #define TYPE_RX62N_CPU RX_CPU_TYPE_NAME("rx62n") -OBJECT_DECLARE_TYPE(RXCPU, RXCPUClass, - RX_CPU) +OBJECT_DECLARE_CPU_TYPE(RXCPU, RXCPUClass, RX_CPU) /* * RXCPUClass: @@ -45,6 +44,4 @@ struct RXCPUClass { DeviceReset parent_reset; }; -#define CPUArchState struct CPURXState - #endif diff --git a/target/rx/cpu.h b/target/rx/cpu.h index 58adf9edf6..b4abd90ccd 100644 --- a/target/rx/cpu.h +++ b/target/rx/cpu.h @@ -65,7 +65,7 @@ enum { NUM_REGS = 16, }; -typedef struct CPURXState { +typedef struct CPUArchState { /* CPU registers */ uint32_t regs[NUM_REGS]; /* general registers */ uint32_t psw_o; /* O bit of status register */ @@ -105,7 +105,7 @@ typedef struct CPURXState { * * A RX CPU */ -struct RXCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -114,8 +114,6 @@ struct RXCPU { CPURXState env; }; -typedef RXCPU ArchCPU; - #define RX_CPU_TYPE_SUFFIX "-" TYPE_RX_CPU #define RX_CPU_TYPE_NAME(model) model RX_CPU_TYPE_SUFFIX #define CPU_RESOLVING_TYPE TYPE_RX_CPU diff --git a/target/s390x/cpu-qom.h b/target/s390x/cpu-qom.h index 9f3a0d86c5..00cae2b131 100644 --- a/target/s390x/cpu-qom.h +++ b/target/s390x/cpu-qom.h @@ -25,12 +25,13 @@ #define TYPE_S390_CPU "s390x-cpu" -OBJECT_DECLARE_TYPE(S390CPU, S390CPUClass, - S390_CPU) +OBJECT_DECLARE_CPU_TYPE(S390CPU, S390CPUClass, S390_CPU) typedef struct S390CPUModel S390CPUModel; typedef struct S390CPUDef S390CPUDef; +typedef struct CPUArchState CPUS390XState; + typedef enum cpu_reset_type { S390_CPU_RESET_NORMAL, S390_CPU_RESET_INITIAL, @@ -63,6 +64,4 @@ struct S390CPUClass { void (*reset)(CPUState *cpu, cpu_reset_type type); }; -typedef struct CPUS390XState CPUS390XState; - #endif diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index a75e559134..c49c8466e7 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -51,7 +51,7 @@ typedef struct PSW { uint64_t addr; } PSW; -struct CPUS390XState { +struct CPUArchState { uint64_t regs[16]; /* GP registers */ /* * The floating point registers are part of the vector registers. @@ -163,7 +163,7 @@ static inline uint64_t *get_freg(CPUS390XState *cs, int nr) * * An S/390 CPU. */ -struct S390CPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -840,9 +840,6 @@ uint64_t s390_cpu_get_psw_mask(CPUS390XState *env); /* outside of target/s390x/ */ S390CPU *s390_cpu_addr2state(uint16_t cpu_addr); -typedef CPUS390XState CPUArchState; -typedef S390CPU ArchCPU; - #include "exec/cpu-all.h" #endif diff --git a/target/sh4/cpu-qom.h b/target/sh4/cpu-qom.h index 8903b4b9c7..d4192d1090 100644 --- a/target/sh4/cpu-qom.h +++ b/target/sh4/cpu-qom.h @@ -29,8 +29,7 @@ #define TYPE_SH7751R_CPU SUPERH_CPU_TYPE_NAME("sh7751r") #define TYPE_SH7785_CPU SUPERH_CPU_TYPE_NAME("sh7785") -OBJECT_DECLARE_TYPE(SuperHCPU, SuperHCPUClass, - SUPERH_CPU) +OBJECT_DECLARE_CPU_TYPE(SuperHCPU, SuperHCPUClass, SUPERH_CPU) /** * SuperHCPUClass: diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h index fb9dd9db2f..c72a30edfd 100644 --- a/target/sh4/cpu.h +++ b/target/sh4/cpu.h @@ -130,7 +130,7 @@ typedef struct memory_content { struct memory_content *next; } memory_content; -typedef struct CPUSH4State { +typedef struct CPUArchState { uint32_t flags; /* general execution flags */ uint32_t gregs[24]; /* general registers */ float32 fregs[32]; /* floating point registers */ @@ -195,7 +195,7 @@ typedef struct CPUSH4State { * * A SuperH CPU. */ -struct SuperHCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -264,9 +264,6 @@ static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch) } } -typedef CPUSH4State CPUArchState; -typedef SuperHCPU ArchCPU; - #include "exec/cpu-all.h" /* MMU control register */ diff --git a/target/sparc/cpu-qom.h b/target/sparc/cpu-qom.h index f33949aaee..86ed37d933 100644 --- a/target/sparc/cpu-qom.h +++ b/target/sparc/cpu-qom.h @@ -29,8 +29,7 @@ #define TYPE_SPARC_CPU "sparc-cpu" #endif -OBJECT_DECLARE_TYPE(SPARCCPU, SPARCCPUClass, - SPARC_CPU) +OBJECT_DECLARE_CPU_TYPE(SPARCCPU, SPARCCPUClass, SPARC_CPU) typedef struct sparc_def_t sparc_def_t; /** diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h index 5a7f1ed5d6..abb38db674 100644 --- a/target/sparc/cpu.h +++ b/target/sparc/cpu.h @@ -420,7 +420,7 @@ struct CPUTimer typedef struct CPUTimer CPUTimer; -typedef struct CPUSPARCState CPUSPARCState; +typedef struct CPUArchState CPUSPARCState; #if defined(TARGET_SPARC64) typedef union { uint64_t mmuregs[16]; @@ -439,7 +439,7 @@ typedef union { }; } SparcV9MMU; #endif -struct CPUSPARCState { +struct CPUArchState { target_ulong gregs[8]; /* general registers */ target_ulong *regwptr; /* pointer to current register window */ target_ulong pc; /* program counter */ @@ -556,7 +556,7 @@ struct CPUSPARCState { * * A SPARC CPU. */ -struct SPARCCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -743,9 +743,6 @@ static inline int cpu_pil_allowed(CPUSPARCState *env1, int pil) #endif } -typedef CPUSPARCState CPUArchState; -typedef SPARCCPU ArchCPU; - #include "exec/cpu-all.h" #ifdef TARGET_SPARC64 diff --git a/target/tricore/cpu-qom.h b/target/tricore/cpu-qom.h index 59bfd01bbc..ee24e9fa76 100644 --- a/target/tricore/cpu-qom.h +++ b/target/tricore/cpu-qom.h @@ -24,8 +24,7 @@ #define TYPE_TRICORE_CPU "tricore-cpu" -OBJECT_DECLARE_TYPE(TriCoreCPU, TriCoreCPUClass, - TRICORE_CPU) +OBJECT_DECLARE_CPU_TYPE(TriCoreCPU, TriCoreCPUClass, TRICORE_CPU) struct TriCoreCPUClass { /*< private >*/ diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h index c461387e71..108d6b8288 100644 --- a/target/tricore/cpu.h +++ b/target/tricore/cpu.h @@ -28,8 +28,7 @@ struct tricore_boot_info; typedef struct tricore_def_t tricore_def_t; -typedef struct CPUTriCoreState CPUTriCoreState; -struct CPUTriCoreState { +typedef struct CPUArchState { /* GPR Register */ uint32_t gpr_a[16]; uint32_t gpr_d[16]; @@ -189,7 +188,7 @@ struct CPUTriCoreState { const tricore_def_t *cpu_model; void *irq[8]; struct QEMUTimer *timer; /* Internal timer */ -}; +} CPUTriCoreState; /** * TriCoreCPU: @@ -197,7 +196,7 @@ struct CPUTriCoreState { * * A TriCore CPU. */ -struct TriCoreCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -369,9 +368,6 @@ static inline int cpu_mmu_index(CPUTriCoreState *env, bool ifetch) return 0; } -typedef CPUTriCoreState CPUArchState; -typedef TriCoreCPU ArchCPU; - #include "exec/cpu-all.h" void cpu_state_reset(CPUTriCoreState *s); diff --git a/target/xtensa/cpu-qom.h b/target/xtensa/cpu-qom.h index 41d9859673..4fc35ee49b 100644 --- a/target/xtensa/cpu-qom.h +++ b/target/xtensa/cpu-qom.h @@ -34,8 +34,7 @@ #define TYPE_XTENSA_CPU "xtensa-cpu" -OBJECT_DECLARE_TYPE(XtensaCPU, XtensaCPUClass, - XTENSA_CPU) +OBJECT_DECLARE_CPU_TYPE(XtensaCPU, XtensaCPUClass, XTENSA_CPU) typedef struct XtensaConfig XtensaConfig; diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h index 02143f2f77..4515f682aa 100644 --- a/target/xtensa/cpu.h +++ b/target/xtensa/cpu.h @@ -306,7 +306,7 @@ typedef enum { INTTYPE_MAX } interrupt_type; -struct CPUXtensaState; +typedef struct CPUArchState CPUXtensaState; typedef struct xtensa_tlb_entry { uint32_t vaddr; @@ -344,7 +344,7 @@ typedef struct XtensaGdbRegmap { } XtensaGdbRegmap; typedef struct XtensaCcompareTimer { - struct CPUXtensaState *env; + CPUXtensaState *env; QEMUTimer *timer; } XtensaCcompareTimer; @@ -506,7 +506,7 @@ enum { }; #endif -typedef struct CPUXtensaState { +struct CPUArchState { const XtensaConfig *config; uint32_t regs[16]; uint32_t pc; @@ -545,7 +545,7 @@ typedef struct CPUXtensaState { /* Watchpoints for DBREAK registers */ struct CPUWatchpoint *cpu_watchpoint[MAX_NDBREAK]; -} CPUXtensaState; +}; /** * XtensaCPU: @@ -553,7 +553,7 @@ typedef struct CPUXtensaState { * * An Xtensa CPU. */ -struct XtensaCPU { +struct ArchCPU { /*< private >*/ CPUState parent_obj; /*< public >*/ @@ -722,9 +722,6 @@ static inline int cpu_mmu_index(CPUXtensaState *env, bool ifetch) #define XTENSA_CSBASE_LBEG_OFF_MASK 0x00ff0000 #define XTENSA_CSBASE_LBEG_OFF_SHIFT 16 -typedef CPUXtensaState CPUArchState; -typedef XtensaCPU ArchCPU; - #include "exec/cpu-all.h" static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc, diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 876af589ce..485f685bd2 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -131,6 +131,9 @@ typedef enum { #define TCG_TARGET_HAS_andc_vec 1 #define TCG_TARGET_HAS_orc_vec 1 +#define TCG_TARGET_HAS_nand_vec 0 +#define TCG_TARGET_HAS_nor_vec 0 +#define TCG_TARGET_HAS_eqv_vec 0 #define TCG_TARGET_HAS_not_vec 1 #define TCG_TARGET_HAS_neg_vec 1 #define TCG_TARGET_HAS_abs_vec 1 diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 27c27a1f14..7e96495392 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -130,6 +130,9 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_andc_vec 1 #define TCG_TARGET_HAS_orc_vec 1 +#define TCG_TARGET_HAS_nand_vec 0 +#define TCG_TARGET_HAS_nor_vec 0 +#define TCG_TARGET_HAS_eqv_vec 0 #define TCG_TARGET_HAS_not_vec 1 #define TCG_TARGET_HAS_neg_vec 1 #define TCG_TARGET_HAS_abs_vec 1 diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h index 78774d1005..91ceb0e1da 100644 --- a/tcg/i386/tcg-target-con-set.h +++ b/tcg/i386/tcg-target-con-set.h @@ -45,6 +45,7 @@ C_O1_I2(r, r, rI) C_O1_I2(x, x, x) C_N1_I2(r, r, r) C_N1_I2(r, r, rW) +C_O1_I3(x, 0, x, x) C_O1_I3(x, x, x, x) C_O1_I4(r, r, re, r, 0) C_O1_I4(r, r, r, ri, ri) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index faa15eecab..b5c6159853 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -171,6 +171,10 @@ bool have_bmi1; bool have_popcnt; bool have_avx1; bool have_avx2; +bool have_avx512bw; +bool have_avx512dq; +bool have_avx512vbmi2; +bool have_avx512vl; bool have_movbe; #ifdef CONFIG_CPUID_H @@ -258,6 +262,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define P_SIMDF3 0x20000 /* 0xf3 opcode prefix */ #define P_SIMDF2 0x40000 /* 0xf2 opcode prefix */ #define P_VEXL 0x80000 /* Set VEX.L = 1 */ +#define P_EVEX 0x100000 /* Requires EVEX encoding */ #define OPC_ARITH_EvIz (0x81) #define OPC_ARITH_EvIb (0x83) @@ -308,6 +313,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_PABSB (0x1c | P_EXT38 | P_DATA16) #define OPC_PABSW (0x1d | P_EXT38 | P_DATA16) #define OPC_PABSD (0x1e | P_EXT38 | P_DATA16) +#define OPC_VPABSQ (0x1f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) #define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16) #define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16) #define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16) @@ -334,15 +340,19 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16) #define OPC_PMAXSW (0xee | P_EXT | P_DATA16) #define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16) +#define OPC_VPMAXSQ (0x3d | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) #define OPC_PMAXUB (0xde | P_EXT | P_DATA16) #define OPC_PMAXUW (0x3e | P_EXT38 | P_DATA16) #define OPC_PMAXUD (0x3f | P_EXT38 | P_DATA16) +#define OPC_VPMAXUQ (0x3f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) #define OPC_PMINSB (0x38 | P_EXT38 | P_DATA16) #define OPC_PMINSW (0xea | P_EXT | P_DATA16) #define OPC_PMINSD (0x39 | P_EXT38 | P_DATA16) +#define OPC_VPMINSQ (0x39 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) #define OPC_PMINUB (0xda | P_EXT | P_DATA16) #define OPC_PMINUW (0x3a | P_EXT38 | P_DATA16) #define OPC_PMINUD (0x3b | P_EXT38 | P_DATA16) +#define OPC_VPMINUQ (0x3b | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) #define OPC_PMOVSXBW (0x20 | P_EXT38 | P_DATA16) #define OPC_PMOVSXWD (0x23 | P_EXT38 | P_DATA16) #define OPC_PMOVSXDQ (0x25 | P_EXT38 | P_DATA16) @@ -351,19 +361,21 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_PMOVZXDQ (0x35 | P_EXT38 | P_DATA16) #define OPC_PMULLW (0xd5 | P_EXT | P_DATA16) #define OPC_PMULLD (0x40 | P_EXT38 | P_DATA16) +#define OPC_VPMULLQ (0x40 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) #define OPC_POR (0xeb | P_EXT | P_DATA16) #define OPC_PSHUFB (0x00 | P_EXT38 | P_DATA16) #define OPC_PSHUFD (0x70 | P_EXT | P_DATA16) #define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2) #define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3) #define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */ -#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */ +#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */ #define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */ #define OPC_PSLLW (0xf1 | P_EXT | P_DATA16) #define OPC_PSLLD (0xf2 | P_EXT | P_DATA16) #define OPC_PSLLQ (0xf3 | P_EXT | P_DATA16) #define OPC_PSRAW (0xe1 | P_EXT | P_DATA16) #define OPC_PSRAD (0xe2 | P_EXT | P_DATA16) +#define OPC_VPSRAQ (0x72 | P_EXT | P_DATA16 | P_VEXW | P_EVEX) #define OPC_PSRLW (0xd1 | P_EXT | P_DATA16) #define OPC_PSRLD (0xd2 | P_EXT | P_DATA16) #define OPC_PSRLQ (0xd3 | P_EXT | P_DATA16) @@ -414,11 +426,29 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16) #define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW) #define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL) +#define OPC_VPROLVD (0x15 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPROLVQ (0x15 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPRORVD (0x14 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPRORVQ (0x14 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSHLDW (0x70 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSHLDD (0x71 | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPSHLDQ (0x71 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSHLDVW (0x70 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSHLDVD (0x71 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPSHLDVQ (0x71 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSHRDVW (0x72 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSHRDVD (0x73 | P_EXT38 | P_DATA16 | P_EVEX) +#define OPC_VPSHRDVQ (0x73 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSLLVW (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) #define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16) #define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW) +#define OPC_VPSRAVW (0x11 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) #define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16) +#define OPC_VPSRAVQ (0x46 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPSRLVW (0x10 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX) #define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16) #define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW) +#define OPC_VPTERNLOGQ (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) #define OPC_VZEROUPPER (0x77 | P_EXT) #define OPC_XCHG_ax_r32 (0x90) @@ -622,9 +652,57 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v, tcg_out8(s, opc); } +static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v, + int rm, int index) +{ + /* The entire 4-byte evex prefix; with R' and V' set. */ + uint32_t p = 0x08041062; + int mm, pp; + + tcg_debug_assert(have_avx512vl); + + /* EVEX.mm */ + if (opc & P_EXT3A) { + mm = 3; + } else if (opc & P_EXT38) { + mm = 2; + } else if (opc & P_EXT) { + mm = 1; + } else { + g_assert_not_reached(); + } + + /* EVEX.pp */ + if (opc & P_DATA16) { + pp = 1; /* 0x66 */ + } else if (opc & P_SIMDF3) { + pp = 2; /* 0xf3 */ + } else if (opc & P_SIMDF2) { + pp = 3; /* 0xf2 */ + } else { + pp = 0; + } + + p = deposit32(p, 8, 2, mm); + p = deposit32(p, 13, 1, (rm & 8) == 0); /* EVEX.RXB.B */ + p = deposit32(p, 14, 1, (index & 8) == 0); /* EVEX.RXB.X */ + p = deposit32(p, 15, 1, (r & 8) == 0); /* EVEX.RXB.R */ + p = deposit32(p, 16, 2, pp); + p = deposit32(p, 19, 4, ~v); + p = deposit32(p, 23, 1, (opc & P_VEXW) != 0); + p = deposit32(p, 29, 2, (opc & P_VEXL) != 0); + + tcg_out32(s, p); + tcg_out8(s, opc); +} + static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm) { - tcg_out_vex_opc(s, opc, r, v, rm, 0); + if (opc & P_EVEX) { + tcg_out_evex_opc(s, opc, r, v, rm, 0); + } else { + tcg_out_vex_opc(s, opc, r, v, rm, 0); + } tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm)); } @@ -2746,7 +2824,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, OPC_PSUBUB, OPC_PSUBUW, OPC_UD2, OPC_UD2 }; static int const mul_insn[4] = { - OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_UD2 + OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_VPMULLQ }; static int const shift_imm_insn[4] = { OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib @@ -2770,28 +2848,31 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2 }; static int const smin_insn[4] = { - OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_UD2 + OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_VPMINSQ }; static int const smax_insn[4] = { - OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_UD2 + OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_VPMAXSQ }; static int const umin_insn[4] = { - OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_UD2 + OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ }; static int const umax_insn[4] = { - OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2 + OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ + }; + static int const rotlv_insn[4] = { + OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ + }; + static int const rotrv_insn[4] = { + OPC_UD2, OPC_UD2, OPC_VPRORVD, OPC_VPRORVQ }; static int const shlv_insn[4] = { - /* TODO: AVX512 adds support for MO_16. */ - OPC_UD2, OPC_UD2, OPC_VPSLLVD, OPC_VPSLLVQ + OPC_UD2, OPC_VPSLLVW, OPC_VPSLLVD, OPC_VPSLLVQ }; static int const shrv_insn[4] = { - /* TODO: AVX512 adds support for MO_16. */ - OPC_UD2, OPC_UD2, OPC_VPSRLVD, OPC_VPSRLVQ + OPC_UD2, OPC_VPSRLVW, OPC_VPSRLVD, OPC_VPSRLVQ }; static int const sarv_insn[4] = { - /* TODO: AVX512 adds support for MO_16, MO_64. */ - OPC_UD2, OPC_UD2, OPC_VPSRAVD, OPC_UD2 + OPC_UD2, OPC_VPSRAVW, OPC_VPSRAVD, OPC_VPSRAVQ }; static int const shls_insn[4] = { OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ @@ -2800,16 +2881,24 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ }; static int const sars_insn[4] = { - OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_UD2 + OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_VPSRAQ + }; + static int const vpshldi_insn[4] = { + OPC_UD2, OPC_VPSHLDW, OPC_VPSHLDD, OPC_VPSHLDQ + }; + static int const vpshldv_insn[4] = { + OPC_UD2, OPC_VPSHLDVW, OPC_VPSHLDVD, OPC_VPSHLDVQ + }; + static int const vpshrdv_insn[4] = { + OPC_UD2, OPC_VPSHRDVW, OPC_VPSHRDVD, OPC_VPSHRDVQ }; static int const abs_insn[4] = { - /* TODO: AVX512 adds support for MO_64. */ - OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2 + OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_VPABSQ }; TCGType type = vecl + TCG_TYPE_V64; int insn, sub; - TCGArg a0, a1, a2; + TCGArg a0, a1, a2, a3; a0 = args[0]; a1 = args[1]; @@ -2867,6 +2956,12 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_sarv_vec: insn = sarv_insn[vece]; goto gen_simd; + case INDEX_op_rotlv_vec: + insn = rotlv_insn[vece]; + goto gen_simd; + case INDEX_op_rotrv_vec: + insn = rotrv_insn[vece]; + goto gen_simd; case INDEX_op_shls_vec: insn = shls_insn[vece]; goto gen_simd; @@ -2888,6 +2983,16 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_x86_packus_vec: insn = packus_insn[vece]; goto gen_simd; + case INDEX_op_x86_vpshldv_vec: + insn = vpshldv_insn[vece]; + a1 = a2; + a2 = args[3]; + goto gen_simd; + case INDEX_op_x86_vpshrdv_vec: + insn = vpshrdv_insn[vece]; + a1 = a2; + a2 = args[3]; + goto gen_simd; #if TCG_TARGET_REG_BITS == 32 case INDEX_op_dup2_vec: /* First merge the two 32-bit inputs to a single 64-bit element. */ @@ -2931,17 +3036,30 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_shli_vec: + insn = shift_imm_insn[vece]; sub = 6; goto gen_shift; case INDEX_op_shri_vec: + insn = shift_imm_insn[vece]; sub = 2; goto gen_shift; case INDEX_op_sari_vec: - tcg_debug_assert(vece != MO_64); + if (vece == MO_64) { + insn = OPC_PSHIFTD_Ib | P_VEXW | P_EVEX; + } else { + insn = shift_imm_insn[vece]; + } sub = 4; + goto gen_shift; + case INDEX_op_rotli_vec: + insn = OPC_PSHIFTD_Ib | P_EVEX; /* VPROL[DQ] */ + if (vece == MO_64) { + insn |= P_VEXW; + } + sub = 1; + goto gen_shift; gen_shift: tcg_debug_assert(vece != MO_8); - insn = shift_imm_insn[vece]; if (type == TCG_TYPE_V256) { insn |= P_VEXL; } @@ -2977,7 +3095,51 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, insn = OPC_VPERM2I128; sub = args[3]; goto gen_simd_imm8; + case INDEX_op_x86_vpshldi_vec: + insn = vpshldi_insn[vece]; + sub = args[3]; + goto gen_simd_imm8; + + case INDEX_op_not_vec: + insn = OPC_VPTERNLOGQ; + a2 = a1; + sub = 0x33; /* !B */ + goto gen_simd_imm8; + case INDEX_op_nor_vec: + insn = OPC_VPTERNLOGQ; + sub = 0x11; /* norCB */ + goto gen_simd_imm8; + case INDEX_op_nand_vec: + insn = OPC_VPTERNLOGQ; + sub = 0x77; /* nandCB */ + goto gen_simd_imm8; + case INDEX_op_eqv_vec: + insn = OPC_VPTERNLOGQ; + sub = 0x99; /* xnorCB */ + goto gen_simd_imm8; + case INDEX_op_orc_vec: + insn = OPC_VPTERNLOGQ; + sub = 0xdd; /* orB!C */ + goto gen_simd_imm8; + + case INDEX_op_bitsel_vec: + insn = OPC_VPTERNLOGQ; + a3 = args[3]; + if (a0 == a1) { + a1 = a2; + a2 = a3; + sub = 0xca; /* A?B:C */ + } else if (a0 == a2) { + a2 = a3; + sub = 0xe2; /* B?A:C */ + } else { + tcg_out_mov(s, type, a0, a3); + sub = 0xb8; /* B?C:A */ + } + goto gen_simd_imm8; + gen_simd_imm8: + tcg_debug_assert(insn != OPC_UD2); if (type == TCG_TYPE_V256) { insn |= P_VEXL; } @@ -3196,6 +3358,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_or_vec: case INDEX_op_xor_vec: case INDEX_op_andc_vec: + case INDEX_op_orc_vec: + case INDEX_op_nand_vec: + case INDEX_op_nor_vec: + case INDEX_op_eqv_vec: case INDEX_op_ssadd_vec: case INDEX_op_usadd_vec: case INDEX_op_sssub_vec: @@ -3207,10 +3373,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_shlv_vec: case INDEX_op_shrv_vec: case INDEX_op_sarv_vec: + case INDEX_op_rotlv_vec: + case INDEX_op_rotrv_vec: case INDEX_op_shls_vec: case INDEX_op_shrs_vec: case INDEX_op_sars_vec: - case INDEX_op_rotls_vec: case INDEX_op_cmp_vec: case INDEX_op_x86_shufps_vec: case INDEX_op_x86_blend_vec: @@ -3219,6 +3386,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_x86_vperm2i128_vec: case INDEX_op_x86_punpckl_vec: case INDEX_op_x86_punpckh_vec: + case INDEX_op_x86_vpshldi_vec: #if TCG_TARGET_REG_BITS == 32 case INDEX_op_dup2_vec: #endif @@ -3226,12 +3394,19 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_abs_vec: case INDEX_op_dup_vec: + case INDEX_op_not_vec: case INDEX_op_shli_vec: case INDEX_op_shri_vec: case INDEX_op_sari_vec: + case INDEX_op_rotli_vec: case INDEX_op_x86_psrldq_vec: return C_O1_I1(x, x); + case INDEX_op_x86_vpshldv_vec: + case INDEX_op_x86_vpshrdv_vec: + return C_O1_I3(x, 0, x, x); + + case INDEX_op_bitsel_vec: case INDEX_op_x86_vpblendvb_vec: return C_O1_I3(x, x, x, x); @@ -3249,53 +3424,96 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_or_vec: case INDEX_op_xor_vec: case INDEX_op_andc_vec: + case INDEX_op_orc_vec: + case INDEX_op_nand_vec: + case INDEX_op_nor_vec: + case INDEX_op_eqv_vec: + case INDEX_op_not_vec: + case INDEX_op_bitsel_vec: return 1; - case INDEX_op_rotli_vec: case INDEX_op_cmp_vec: case INDEX_op_cmpsel_vec: return -1; + case INDEX_op_rotli_vec: + return have_avx512vl && vece >= MO_32 ? 1 : -1; + case INDEX_op_shli_vec: case INDEX_op_shri_vec: /* We must expand the operation for MO_8. */ return vece == MO_8 ? -1 : 1; case INDEX_op_sari_vec: - /* We must expand the operation for MO_8. */ - if (vece == MO_8) { + switch (vece) { + case MO_8: return -1; - } - /* We can emulate this for MO_64, but it does not pay off - unless we're producing at least 4 values. */ - if (vece == MO_64) { + case MO_16: + case MO_32: + return 1; + case MO_64: + if (have_avx512vl) { + return 1; + } + /* + * We can emulate this for MO_64, but it does not pay off + * unless we're producing at least 4 values. + */ return type >= TCG_TYPE_V256 ? -1 : 0; } - return 1; + return 0; case INDEX_op_shls_vec: case INDEX_op_shrs_vec: return vece >= MO_16; case INDEX_op_sars_vec: - return vece >= MO_16 && vece <= MO_32; + switch (vece) { + case MO_16: + case MO_32: + return 1; + case MO_64: + return have_avx512vl; + } + return 0; case INDEX_op_rotls_vec: return vece >= MO_16 ? -1 : 0; case INDEX_op_shlv_vec: case INDEX_op_shrv_vec: - return have_avx2 && vece >= MO_32; + switch (vece) { + case MO_16: + return have_avx512bw; + case MO_32: + case MO_64: + return have_avx2; + } + return 0; case INDEX_op_sarv_vec: - return have_avx2 && vece == MO_32; + switch (vece) { + case MO_16: + return have_avx512bw; + case MO_32: + return have_avx2; + case MO_64: + return have_avx512vl; + } + return 0; case INDEX_op_rotlv_vec: case INDEX_op_rotrv_vec: - return have_avx2 && vece >= MO_32 ? -1 : 0; + switch (vece) { + case MO_16: + return have_avx512vbmi2 ? -1 : 0; + case MO_32: + case MO_64: + return have_avx512vl ? 1 : have_avx2 ? -1 : 0; + } + return 0; case INDEX_op_mul_vec: - if (vece == MO_8) { - /* We can expand the operation for MO_8. */ + switch (vece) { + case MO_8: return -1; - } - if (vece == MO_64) { - return 0; + case MO_64: + return have_avx512dq; } return 1; @@ -3309,7 +3527,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_umin_vec: case INDEX_op_umax_vec: case INDEX_op_abs_vec: - return vece <= MO_32; + return vece <= MO_32 || have_avx512vl; default: return 0; @@ -3427,6 +3645,12 @@ static void expand_vec_rotli(TCGType type, unsigned vece, return; } + if (have_avx512vbmi2) { + vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece, + tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm); + return; + } + t = tcg_temp_new_vec(type); tcg_gen_shli_vec(vece, t, v1, imm); tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm); @@ -3434,31 +3658,19 @@ static void expand_vec_rotli(TCGType type, unsigned vece, tcg_temp_free_vec(t); } -static void expand_vec_rotls(TCGType type, unsigned vece, - TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh) +static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0, + TCGv_vec v1, TCGv_vec sh, bool right) { - TCGv_i32 rsh; TCGv_vec t; - tcg_debug_assert(vece != MO_8); + if (have_avx512vbmi2) { + vec_gen_4(right ? INDEX_op_x86_vpshrdv_vec : INDEX_op_x86_vpshldv_vec, + type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v1), + tcgv_vec_arg(v1), tcgv_vec_arg(sh)); + return; + } t = tcg_temp_new_vec(type); - rsh = tcg_temp_new_i32(); - - tcg_gen_neg_i32(rsh, lsh); - tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1); - tcg_gen_shls_vec(vece, t, v1, lsh); - tcg_gen_shrs_vec(vece, v0, v1, rsh); - tcg_gen_or_vec(vece, v0, v0, t); - tcg_temp_free_vec(t); - tcg_temp_free_i32(rsh); -} - -static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0, - TCGv_vec v1, TCGv_vec sh, bool right) -{ - TCGv_vec t = tcg_temp_new_vec(type); - tcg_gen_dupi_vec(vece, t, 8 << vece); tcg_gen_sub_vec(vece, t, t, sh); if (right) { @@ -3472,6 +3684,35 @@ static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0, tcg_temp_free_vec(t); } +static void expand_vec_rotls(TCGType type, unsigned vece, + TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh) +{ + TCGv_vec t = tcg_temp_new_vec(type); + + tcg_debug_assert(vece != MO_8); + + if (vece >= MO_32 ? have_avx512vl : have_avx512vbmi2) { + tcg_gen_dup_i32_vec(vece, t, lsh); + if (vece >= MO_32) { + tcg_gen_rotlv_vec(vece, v0, v1, t); + } else { + expand_vec_rotv(type, vece, v0, v1, t, false); + } + } else { + TCGv_i32 rsh = tcg_temp_new_i32(); + + tcg_gen_neg_i32(rsh, lsh); + tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1); + tcg_gen_shls_vec(vece, t, v1, lsh); + tcg_gen_shrs_vec(vece, v0, v1, rsh); + tcg_gen_or_vec(vece, v0, v0, t); + + tcg_temp_free_i32(rsh); + } + + tcg_temp_free_vec(t); +} + static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0, TCGv_vec v1, TCGv_vec v2) { @@ -3567,28 +3808,28 @@ static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0, fixup = NEED_SWAP | NEED_INV; break; case TCG_COND_LEU: - if (vece <= MO_32) { + if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) { fixup = NEED_UMIN; } else { fixup = NEED_BIAS | NEED_INV; } break; case TCG_COND_GTU: - if (vece <= MO_32) { + if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) { fixup = NEED_UMIN | NEED_INV; } else { fixup = NEED_BIAS; } break; case TCG_COND_GEU: - if (vece <= MO_32) { + if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) { fixup = NEED_UMAX; } else { fixup = NEED_BIAS | NEED_SWAP | NEED_INV; } break; case TCG_COND_LTU: - if (vece <= MO_32) { + if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) { fixup = NEED_UMAX | NEED_INV; } else { fixup = NEED_BIAS | NEED_SWAP; @@ -3839,12 +4080,12 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count) static void tcg_target_init(TCGContext *s) { #ifdef CONFIG_CPUID_H - unsigned a, b, c, d, b7 = 0; + unsigned a, b, c, d, b7 = 0, c7 = 0; unsigned max = __get_cpuid_max(0, 0); if (max >= 7) { /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */ - __cpuid_count(7, 0, a, b7, c, d); + __cpuid_count(7, 0, a, b7, c7, d); have_bmi1 = (b7 & bit_BMI) != 0; have_bmi2 = (b7 & bit_BMI2) != 0; } @@ -3874,6 +4115,22 @@ static void tcg_target_init(TCGContext *s) if ((xcrl & 6) == 6) { have_avx1 = (c & bit_AVX) != 0; have_avx2 = (b7 & bit_AVX2) != 0; + + /* + * There are interesting instructions in AVX512, so long + * as we have AVX512VL, which indicates support for EVEX + * on sizes smaller than 512 bits. We are required to + * check that OPMASK and all extended ZMM state are enabled + * even if we're not using them -- the insns will fault. + */ + if ((xcrl & 0xe0) == 0xe0 + && (b7 & bit_AVX512F) + && (b7 & bit_AVX512VL)) { + have_avx512vl = true; + have_avx512bw = (b7 & bit_AVX512BW) != 0; + have_avx512dq = (b7 & bit_AVX512DQ) != 0; + have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0; + } } } } diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 3b2c9437a0..00fcbe297d 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -103,6 +103,10 @@ extern bool have_bmi1; extern bool have_popcnt; extern bool have_avx1; extern bool have_avx2; +extern bool have_avx512bw; +extern bool have_avx512dq; +extern bool have_avx512vbmi2; +extern bool have_avx512vl; extern bool have_movbe; /* optional instructions */ @@ -184,20 +188,23 @@ extern bool have_movbe; #define TCG_TARGET_HAS_v256 have_avx2 #define TCG_TARGET_HAS_andc_vec 1 -#define TCG_TARGET_HAS_orc_vec 0 -#define TCG_TARGET_HAS_not_vec 0 +#define TCG_TARGET_HAS_orc_vec have_avx512vl +#define TCG_TARGET_HAS_nand_vec have_avx512vl +#define TCG_TARGET_HAS_nor_vec have_avx512vl +#define TCG_TARGET_HAS_eqv_vec have_avx512vl +#define TCG_TARGET_HAS_not_vec have_avx512vl #define TCG_TARGET_HAS_neg_vec 0 #define TCG_TARGET_HAS_abs_vec 1 -#define TCG_TARGET_HAS_roti_vec 0 +#define TCG_TARGET_HAS_roti_vec have_avx512vl #define TCG_TARGET_HAS_rots_vec 0 -#define TCG_TARGET_HAS_rotv_vec 0 +#define TCG_TARGET_HAS_rotv_vec have_avx512vl #define TCG_TARGET_HAS_shi_vec 1 #define TCG_TARGET_HAS_shs_vec 1 #define TCG_TARGET_HAS_shv_vec have_avx2 #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 1 #define TCG_TARGET_HAS_minmax_vec 1 -#define TCG_TARGET_HAS_bitsel_vec 0 +#define TCG_TARGET_HAS_bitsel_vec have_avx512vl #define TCG_TARGET_HAS_cmpsel_vec -1 #define TCG_TARGET_deposit_i32_valid(ofs, len) \ diff --git a/tcg/i386/tcg-target.opc.h b/tcg/i386/tcg-target.opc.h index 1312941800..b5f403e35e 100644 --- a/tcg/i386/tcg-target.opc.h +++ b/tcg/i386/tcg-target.opc.h @@ -33,3 +33,6 @@ DEF(x86_psrldq_vec, 1, 1, 1, IMPLVEC) DEF(x86_vperm2i128_vec, 1, 2, 1, IMPLVEC) DEF(x86_punpckl_vec, 1, 2, 0, IMPLVEC) DEF(x86_punpckh_vec, 1, 2, 0, IMPLVEC) +DEF(x86_vpshldi_vec, 1, 2, 1, IMPLVEC) +DEF(x86_vpshldv_vec, 1, 3, 0, IMPLVEC) +DEF(x86_vpshrdv_vec, 1, 3, 0, IMPLVEC) diff --git a/tcg/optimize.c b/tcg/optimize.c index e573000951..ae081ab29c 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -359,13 +359,13 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) CASE_OP_32_64_VEC(orc): return x | ~y; - CASE_OP_32_64(eqv): + CASE_OP_32_64_VEC(eqv): return ~(x ^ y); - CASE_OP_32_64(nand): + CASE_OP_32_64_VEC(nand): return ~(x & y); - CASE_OP_32_64(nor): + CASE_OP_32_64_VEC(nor): return ~(x | y); case INDEX_op_clz_i32: @@ -552,10 +552,10 @@ static bool do_constant_folding_cond_eq(TCGCond c) static int do_constant_folding_cond(TCGType type, TCGArg x, TCGArg y, TCGCond c) { - uint64_t xv = arg_info(x)->val; - uint64_t yv = arg_info(y)->val; - if (arg_is_const(x) && arg_is_const(y)) { + uint64_t xv = arg_info(x)->val; + uint64_t yv = arg_info(y)->val; + switch (type) { case TCG_TYPE_I32: return do_constant_folding_cond_32(xv, yv, c); @@ -567,7 +567,7 @@ static int do_constant_folding_cond(TCGType type, TCGArg x, } } else if (args_are_copies(x, y)) { return do_constant_folding_cond_eq(c); - } else if (arg_is_const(y) && yv == 0) { + } else if (arg_is_const(y) && arg_info(y)->val == 0) { switch (c) { case TCG_COND_LTU: return 0; @@ -2119,7 +2119,7 @@ void tcg_optimize(TCGContext *s) case INDEX_op_dup2_vec: done = fold_dup2(&ctx, op); break; - CASE_OP_32_64(eqv): + CASE_OP_32_64_VEC(eqv): done = fold_eqv(&ctx, op); break; CASE_OP_32_64(extract): @@ -2170,13 +2170,13 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(mulu2): done = fold_multiply2(&ctx, op); break; - CASE_OP_32_64(nand): + CASE_OP_32_64_VEC(nand): done = fold_nand(&ctx, op); break; CASE_OP_32_64(neg): done = fold_neg(&ctx, op); break; - CASE_OP_32_64(nor): + CASE_OP_32_64_VEC(nor): done = fold_nor(&ctx, op); break; CASE_OP_32_64_VEC(not): diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 69d22e08cb..1f3c5c171c 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3122,6 +3122,9 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_xor_vec: case INDEX_op_andc_vec: case INDEX_op_not_vec: + case INDEX_op_nor_vec: + case INDEX_op_eqv_vec: + case INDEX_op_nand_vec: return 1; case INDEX_op_orc_vec: return have_isa_2_07; @@ -3400,6 +3403,15 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_orc_vec: insn = VORC; break; + case INDEX_op_nand_vec: + insn = VNAND; + break; + case INDEX_op_nor_vec: + insn = VNOR; + break; + case INDEX_op_eqv_vec: + insn = VEQV; + break; case INDEX_op_cmp_vec: switch (args[3]) { @@ -3787,6 +3799,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_xor_vec: case INDEX_op_andc_vec: case INDEX_op_orc_vec: + case INDEX_op_nor_vec: + case INDEX_op_eqv_vec: + case INDEX_op_nand_vec: case INDEX_op_cmp_vec: case INDEX_op_ssadd_vec: case INDEX_op_sssub_vec: diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index c775c97b61..e6cf72503f 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -162,6 +162,9 @@ extern bool have_vsx; #define TCG_TARGET_HAS_andc_vec 1 #define TCG_TARGET_HAS_orc_vec have_isa_2_07 +#define TCG_TARGET_HAS_nand_vec have_isa_2_07 +#define TCG_TARGET_HAS_nor_vec 1 +#define TCG_TARGET_HAS_eqv_vec have_isa_2_07 #define TCG_TARGET_HAS_not_vec 1 #define TCG_TARGET_HAS_neg_vec have_isa_3_00 #define TCG_TARGET_HAS_abs_vec 0 diff --git a/tcg/region.c b/tcg/region.c index 72afb35738..97ca5291d5 100644 --- a/tcg/region.c +++ b/tcg/region.c @@ -26,6 +26,7 @@ #include "qemu/units.h" #include "qemu/madvise.h" #include "qemu/mprotect.h" +#include "qemu/memalign.h" #include "qemu/cacheinfo.h" #include "qapi/error.h" #include "exec/exec-all.h" diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index d56c1e51e4..6e65828c09 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -290,7 +290,9 @@ typedef enum S390Opcode { VRRc_VMXL = 0xe7fd, VRRc_VN = 0xe768, VRRc_VNC = 0xe769, + VRRc_VNN = 0xe76e, VRRc_VNO = 0xe76b, + VRRc_VNX = 0xe76c, VRRc_VO = 0xe76a, VRRc_VOC = 0xe76f, VRRc_VPKS = 0xe797, /* we leave the m5 cs field 0 */ @@ -2805,6 +2807,15 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_xor_vec: tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0); break; + case INDEX_op_nand_vec: + tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0); + break; + case INDEX_op_nor_vec: + tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0); + break; + case INDEX_op_eqv_vec: + tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0); + break; case INDEX_op_shli_vec: tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece); @@ -2901,7 +2912,10 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_and_vec: case INDEX_op_andc_vec: case INDEX_op_bitsel_vec: + case INDEX_op_eqv_vec: + case INDEX_op_nand_vec: case INDEX_op_neg_vec: + case INDEX_op_nor_vec: case INDEX_op_not_vec: case INDEX_op_or_vec: case INDEX_op_orc_vec: @@ -3246,6 +3260,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_or_vec: case INDEX_op_orc_vec: case INDEX_op_xor_vec: + case INDEX_op_nand_vec: + case INDEX_op_nor_vec: + case INDEX_op_eqv_vec: case INDEX_op_cmp_vec: case INDEX_op_mul_vec: case INDEX_op_rotlv_vec: diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index 69217d995b..23e2063667 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -145,6 +145,9 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_andc_vec 1 #define TCG_TARGET_HAS_orc_vec HAVE_FACILITY(VECTOR_ENH1) +#define TCG_TARGET_HAS_nand_vec HAVE_FACILITY(VECTOR_ENH1) +#define TCG_TARGET_HAS_nor_vec 1 +#define TCG_TARGET_HAS_eqv_vec HAVE_FACILITY(VECTOR_ENH1) #define TCG_TARGET_HAS_not_vec 1 #define TCG_TARGET_HAS_neg_vec 1 #define TCG_TARGET_HAS_abs_vec 1 diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c index faf30f9cdd..463dabf515 100644 --- a/tcg/tcg-op-vec.c +++ b/tcg/tcg-op-vec.c @@ -371,23 +371,32 @@ void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */ - tcg_gen_and_vec(0, r, a, b); - tcg_gen_not_vec(0, r, r); + if (TCG_TARGET_HAS_nand_vec) { + vec_gen_op3(INDEX_op_nand_vec, 0, r, a, b); + } else { + tcg_gen_and_vec(0, r, a, b); + tcg_gen_not_vec(0, r, r); + } } void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */ - tcg_gen_or_vec(0, r, a, b); - tcg_gen_not_vec(0, r, r); + if (TCG_TARGET_HAS_nor_vec) { + vec_gen_op3(INDEX_op_nor_vec, 0, r, a, b); + } else { + tcg_gen_or_vec(0, r, a, b); + tcg_gen_not_vec(0, r, r); + } } void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { - /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */ - tcg_gen_xor_vec(0, r, a, b); - tcg_gen_not_vec(0, r, r); + if (TCG_TARGET_HAS_eqv_vec) { + vec_gen_op3(INDEX_op_eqv_vec, 0, r, a, b); + } else { + tcg_gen_xor_vec(0, r, a, b); + tcg_gen_not_vec(0, r, r); + } } static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc) @@ -1407,6 +1407,12 @@ bool tcg_op_supported(TCGOpcode op) return have_vec && TCG_TARGET_HAS_andc_vec; case INDEX_op_orc_vec: return have_vec && TCG_TARGET_HAS_orc_vec; + case INDEX_op_nand_vec: + return have_vec && TCG_TARGET_HAS_nand_vec; + case INDEX_op_nor_vec: + return have_vec && TCG_TARGET_HAS_nor_vec; + case INDEX_op_eqv_vec: + return have_vec && TCG_TARGET_HAS_eqv_vec; case INDEX_op_mul_vec: return have_vec && TCG_TARGET_HAS_mul_vec; case INDEX_op_shli_vec: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 9ff1fa0832..98337c567a 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -197,7 +197,7 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_R0, }; -#if MAX_OPC_PARAM_IARGS != 6 +#if MAX_OPC_PARAM_IARGS != 7 # error Fix needed, number of supported input arguments changed! #endif diff --git a/tests/avocado/avocado_qemu/__init__.py b/tests/avocado/avocado_qemu/__init__.py index 75063c0c30..9b056b5ce5 100644 --- a/tests/avocado/avocado_qemu/__init__.py +++ b/tests/avocado/avocado_qemu/__init__.py @@ -603,6 +603,8 @@ class LinuxTest(LinuxSSHMixIn, QemuSystemTest): try: cloudinit_iso = os.path.join(self.workdir, 'cloudinit.iso') self.phone_home_port = network.find_free_port() + if not self.phone_home_port: + self.cancel('Failed to get a free port') pubkey_content = None if ssh_pubkey: with open(ssh_pubkey) as pubkey: diff --git a/tests/avocado/boot_linux.py b/tests/avocado/boot_linux.py index ab19146d1e..ee584d2fdf 100644 --- a/tests/avocado/boot_linux.py +++ b/tests/avocado/boot_linux.py @@ -79,6 +79,7 @@ class BootLinuxAarch64(LinuxTest): """ self.require_accelerator("tcg") self.vm.add_args("-accel", "tcg") + self.vm.add_args("-cpu", "max,lpa2=off") self.vm.add_args("-machine", "virt,gic-version=2") self.add_common_args() self.launch_and_wait(set_up_ssh_connection=False) @@ -91,6 +92,7 @@ class BootLinuxAarch64(LinuxTest): """ self.require_accelerator("tcg") self.vm.add_args("-accel", "tcg") + self.vm.add_args("-cpu", "max,lpa2=off") self.vm.add_args("-machine", "virt,gic-version=3") self.add_common_args() self.launch_and_wait(set_up_ssh_connection=False) diff --git a/tests/bench/atomic_add-bench.c b/tests/bench/atomic_add-bench.c index f05471ab45..8a6faad6ec 100644 --- a/tests/bench/atomic_add-bench.c +++ b/tests/bench/atomic_add-bench.c @@ -2,6 +2,7 @@ #include "qemu/thread.h" #include "qemu/host-utils.h" #include "qemu/processor.h" +#include "qemu/memalign.h" struct thread_info { uint64_t r; diff --git a/tests/bench/qht-bench.c b/tests/bench/qht-bench.c index 2e5b70ccd0..8afe161d10 100644 --- a/tests/bench/qht-bench.c +++ b/tests/bench/qht-bench.c @@ -10,6 +10,7 @@ #include "qemu/qht.h" #include "qemu/rcu.h" #include "qemu/xxhash.h" +#include "qemu/memalign.h" struct thread_stats { size_t rd; diff --git a/tests/check-block.sh b/tests/check-block.sh index 18f7433901..f59496396c 100755 --- a/tests/check-block.sh +++ b/tests/check-block.sh @@ -48,18 +48,6 @@ if LANG=C bash --version | grep -q 'GNU bash, version [123]' ; then skip "bash version too old ==> Not running the qemu-iotests." fi -if ! (sed --version | grep 'GNU sed') > /dev/null 2>&1 ; then - if ! command -v gsed >/dev/null 2>&1; then - skip "GNU sed not available ==> Not running the qemu-iotests." - fi -else - # Double-check that we're not using BusyBox' sed which says - # that "This is not GNU sed version 4.0" ... - if sed --version | grep -q 'not GNU sed' ; then - skip "BusyBox sed not supported ==> Not running the qemu-iotests." - fi -fi - cd tests/qemu-iotests # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 index 6af5ab9e76..0e1cfd7e49 100755 --- a/tests/qemu-iotests/040 +++ b/tests/qemu-iotests/040 @@ -744,6 +744,7 @@ class TestCommitWithFilters(iotests.QMPTestCase): pattern_file) self.assertFalse('Pattern verification failed' in result) + @iotests.skip_if_unsupported(['throttle']) def setUp(self): qemu_img('create', '-f', iotests.imgfmt, self.img0, '64M') qemu_img('create', '-f', iotests.imgfmt, self.img1, '64M') diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185 index f2ec5c5ceb..8b1143dc16 100755 --- a/tests/qemu-iotests/185 +++ b/tests/qemu-iotests/185 @@ -33,6 +33,12 @@ _cleanup() _rm_test_img "${TEST_IMG}.copy" _cleanup_test_img _cleanup_qemu + + if [ -f "$TEST_DIR/qsd.pid" ]; then + kill -SIGKILL "$(cat "$TEST_DIR/qsd.pid")" + rm -f "$TEST_DIR/qsd.pid" + fi + rm -f "$SOCK_DIR/qsd.sock" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -45,7 +51,7 @@ _supported_fmt qcow2 _supported_proto file _supported_os Linux -size=64M +size=$((64 * 1048576)) TEST_IMG="${TEST_IMG}.base" _make_test_img $size echo @@ -216,6 +222,188 @@ wait=1 _cleanup_qemu | grep -v 'JOB_STATUS_CHANGE' _check_test_img +echo +echo === Start mirror to throttled QSD and exit qemu === +echo + +# Mirror to a throttled QSD instance (so that qemu cannot drain the +# throttling), wait for READY, then write some data to the device, +# and then quit qemu. +# (qemu should force-cancel the job and not wait for the data to be +# written to the target.) + +_make_test_img $size + +# Will be used by this and the next case +set_up_throttled_qsd() { + $QSD \ + --object throttle-group,id=thrgr,limits.bps-total=1048576 \ + --blockdev null-co,node-name=null,size=$size \ + --blockdev throttle,node-name=throttled,throttle-group=thrgr,file=null \ + --nbd-server addr.type=unix,addr.path="$SOCK_DIR/qsd.sock" \ + --export nbd,id=exp,node-name=throttled,name=target,writable=true \ + --pidfile "$TEST_DIR/qsd.pid" \ + --daemonize +} + +set_up_throttled_qsd + +# Need a virtio-blk device so that qemu-io writes will not block the monitor +_launch_qemu \ + --blockdev file,node-name=source-proto,filename="$TEST_IMG" \ + --blockdev qcow2,node-name=source-fmt,file=source-proto \ + --device virtio-blk,id=vblk,drive=source-fmt \ + --blockdev "{\"driver\": \"nbd\", + \"node-name\": \"target\", + \"server\": { + \"type\": \"unix\", + \"path\": \"$SOCK_DIR/qsd.sock\" + }, + \"export\": \"target\"}" + +h=$QEMU_HANDLE +_send_qemu_cmd $h '{"execute": "qmp_capabilities"}' 'return' + +# Use sync=top, so the first pass will not copy the whole image +_send_qemu_cmd $h \ + '{"execute": "blockdev-mirror", + "arguments": { + "job-id": "mirror", + "device": "source-fmt", + "target": "target", + "sync": "top" + }}' \ + 'return' \ + | grep -v JOB_STATUS_CHANGE # Ignore these events during creation + +# This too will be used by this and the next case +# $1: QEMU handle +# $2: Image size +wait_for_job_and_quit() { + h=$1 + size=$2 + + # List of expected events + capture_events='BLOCK_JOB_READY JOB_STATUS_CHANGE' + _wait_event $h 'BLOCK_JOB_READY' + QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before READY + + # Write something to the device for post-READY mirroring. Write it in + # blocks matching the cluster size, each spaced one block apart, so + # that the mirror job will have to spawn one request per cluster. + # Because the number of concurrent requests is limited (to 16), this + # limits the number of bytes concurrently in flight, which speeds up + # cancelling the job (in-flight requests still are waited for). + # To limit the number of bytes in flight, we could alternatively pass + # something for blockdev-mirror's @buf-size parameter, but + # block-commit does not have such a parameter, so we need to figure + # something out that works for both. + + cluster_size=65536 + step=$((cluster_size * 2)) + + echo '--- Writing data to the virtio-blk device ---' + + for ofs in $(seq 0 $step $((size - step))); do + qemu_io_cmd="qemu-io -d vblk/virtio-backend " + qemu_io_cmd+="\\\"aio_write $ofs $cluster_size\\\"" + + # Do not include these requests in the reference output + # (it's just too much) + silent=yes _send_qemu_cmd $h \ + "{\"execute\": \"human-monitor-command\", + \"arguments\": { + \"command-line\": \"$qemu_io_cmd\" + }}" \ + 'return' + done + + # Wait until the job's length is updated to reflect the write requests + + # We have written to half of the device, so this is the expected job length + final_len=$((size / 2)) + timeout=100 # unit: 0.1 seconds + while true; do + len=$( + _send_qemu_cmd $h \ + '{"execute": "query-block-jobs"}' \ + 'return.*"len": [0-9]\+' \ + | grep 'return.*"len": [0-9]\+' \ + | sed -e 's/.*"len": \([0-9]\+\).*/\1/' + ) + if [ "$len" -eq "$final_len" ]; then + break + fi + timeout=$((timeout - 1)) + if [ "$timeout" -eq 0 ]; then + echo "ERROR: Timeout waiting for job to reach len=$final_len" + break + fi + sleep 0.1 + done + + sleep 1 + + _send_qemu_cmd $h \ + '{"execute": "quit"}' \ + 'return' + + # List of expected events + capture_events='BLOCK_JOB_CANCELLED JOB_STATUS_CHANGE SHUTDOWN' + _wait_event $h 'SHUTDOWN' + QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before SHUTDOWN + _wait_event $h 'JOB_STATUS_CHANGE' # standby + _wait_event $h 'JOB_STATUS_CHANGE' # ready + _wait_event $h 'JOB_STATUS_CHANGE' # aborting + # Filter the offset (depends on when exactly `quit` was issued) + _wait_event $h 'BLOCK_JOB_CANCELLED' \ + | sed -e 's/"offset": [0-9]\+/"offset": (filtered)/' + _wait_event $h 'JOB_STATUS_CHANGE' # concluded + _wait_event $h 'JOB_STATUS_CHANGE' # null + + wait=yes _cleanup_qemu + + kill -SIGTERM "$(cat "$TEST_DIR/qsd.pid")" +} + +wait_for_job_and_quit $h $size + +echo +echo === Start active commit to throttled QSD and exit qemu === +echo + +# Same as the above, but instead of mirroring, do an active commit + +_make_test_img $size + +set_up_throttled_qsd + +_launch_qemu \ + --blockdev "{\"driver\": \"nbd\", + \"node-name\": \"target\", + \"server\": { + \"type\": \"unix\", + \"path\": \"$SOCK_DIR/qsd.sock\" + }, + \"export\": \"target\"}" \ + --blockdev file,node-name=source-proto,filename="$TEST_IMG" \ + --blockdev qcow2,node-name=source-fmt,file=source-proto,backing=target \ + --device virtio-blk,id=vblk,drive=source-fmt + +h=$QEMU_HANDLE +_send_qemu_cmd $h '{"execute": "qmp_capabilities"}' 'return' + +_send_qemu_cmd $h \ + '{"execute": "block-commit", + "arguments": { + "job-id": "commit", + "device": "source-fmt" + }}' \ + 'return' \ + | grep -v JOB_STATUS_CHANGE # Ignore these events during creation + +wait_for_job_and_quit $h $size + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out index 754a641258..70e8dd6c87 100644 --- a/tests/qemu-iotests/185.out +++ b/tests/qemu-iotests/185.out @@ -116,4 +116,52 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "stream"}} No errors were found on the image. + +=== Start mirror to throttled QSD and exit qemu === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +{"execute": "qmp_capabilities"} +{"return": {}} +{"execute": "blockdev-mirror", + "arguments": { + "job-id": "mirror", + "device": "source-fmt", + "target": "target", + "sync": "top" + }} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "mirror", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} +--- Writing data to the virtio-blk device --- +{"execute": "quit"} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "mirror", "len": 33554432, "offset": (filtered), "speed": 0, "type": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "mirror"}} + +=== Start active commit to throttled QSD and exit qemu === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +{"execute": "qmp_capabilities"} +{"return": {}} +{"execute": "block-commit", + "arguments": { + "job-id": "commit", + "device": "source-fmt" + }} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "commit", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} +--- Writing data to the virtio-blk device --- +{"execute": "quit"} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "commit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "commit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "commit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "commit", "len": 33554432, "offset": (filtered), "speed": 0, "type": "commit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "commit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "commit"}} *** done diff --git a/tests/qemu-iotests/257.out b/tests/qemu-iotests/257.out index 50cbd8e882..aa76131ca9 100644 --- a/tests/qemu-iotests/257.out +++ b/tests/qemu-iotests/257.out @@ -106,6 +106,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -566,6 +582,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -819,6 +851,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -1279,6 +1327,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -1532,6 +1596,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -1992,6 +2072,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -2245,6 +2341,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -2705,6 +2817,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -2958,6 +3086,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -3418,6 +3562,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -3671,6 +3831,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -4131,6 +4307,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -4384,6 +4576,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -4844,6 +5052,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, diff --git a/tests/qemu-iotests/271 b/tests/qemu-iotests/271 index 2775b4d130..c7c2cadda0 100755 --- a/tests/qemu-iotests/271 +++ b/tests/qemu-iotests/271 @@ -896,7 +896,7 @@ _make_test_img -o extended_l2=on 1M # Second and third writes in _concurrent_io() are independent and may finish in # different order. So, filter offset out to match both possible variants. _concurrent_io | $QEMU_IO | _filter_qemu_io | \ - $SED -e 's/\(20480\|40960\)/OFFSET/' + sed -e 's/\(20480\|40960\)/OFFSET/' _concurrent_verify | $QEMU_IO | _filter_qemu_io # success, all done diff --git a/tests/qemu-iotests/296 b/tests/qemu-iotests/296 index 099a3eeaa5..f80ef3434a 100755 --- a/tests/qemu-iotests/296 +++ b/tests/qemu-iotests/296 @@ -174,8 +174,12 @@ class EncryptionSetupTestCase(iotests.QMPTestCase): } result = vm.qmp('x-blockdev-amend', **args) - assert result['return'] == {} - vm.run_job('job0') + iotests.log(result) + # Run the job only if it was created + event = ('JOB_STATUS_CHANGE', + {'data': {'id': 'job0', 'status': 'created'}}) + if vm.events_wait([event], timeout=0.0) is not None: + vm.run_job('job0') # test that when the image opened by two qemu processes, # neither of them can update the encryption keys diff --git a/tests/qemu-iotests/296.out b/tests/qemu-iotests/296.out index 42205cc981..609826eaa0 100644 --- a/tests/qemu-iotests/296.out +++ b/tests/qemu-iotests/296.out @@ -1,11 +1,9 @@ -{"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} -Job failed: Failed to get shared "consistent read" lock {"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} -Job failed: Failed to get shared "consistent read" lock -{"execute": "job-dismiss", "arguments": {"id": "job0"}} +{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}} +{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}} {"return": {}} {"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} @@ -13,14 +11,9 @@ qemu-img: Failed to get shared "consistent read" lock Is another process using the image [TEST_DIR/test.img]? . -Job failed: Block node is read-only -{"execute": "job-dismiss", "arguments": {"id": "job0"}} -{"return": {}} -Job failed: Failed to get shared "consistent read" lock -{"execute": "job-dismiss", "arguments": {"id": "job0"}} -{"return": {}} -Job failed: Failed to get shared "consistent read" lock -{"execute": "job-dismiss", "arguments": {"id": "job0"}} +{"error": {"class": "GenericError", "desc": "Block node is read-only"}} +{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}} +{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}} {"return": {}} {"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index 75cc241580..21819db9c3 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -21,44 +21,44 @@ _filter_date() { - $SED -re 's/[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/yyyy-mm-dd hh:mm:ss/' + sed -Ee 's/[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/yyyy-mm-dd hh:mm:ss/' } _filter_vmstate_size() { - $SED -r -e 's/[0-9. ]{5} [KMGT]iB/ SIZE/' \ - -e 's/[0-9. ]{5} B/ SIZE/' + sed -E -e 's/[0-9. ]{5} [KMGT]iB/ SIZE/' \ + -e 's/[0-9. ]{5} B/ SIZE/' } _filter_generated_node_ids() { - $SED -re 's/\#block[0-9]{3,}/NODE_NAME/' + sed -Ee 's/\#block[0-9]{3,}/NODE_NAME/' } _filter_qom_path() { - $SED -e '/Attached to:/s/\device[[0-9]\+\]/device[N]/g' + gsed -e '/Attached to:/s/\device[[0-9]\+\]/device[N]/g' } # replace occurrences of the actual TEST_DIR value with TEST_DIR _filter_testdir() { - $SED -e "s#$TEST_DIR/#TEST_DIR/#g" \ - -e "s#$SOCK_DIR/#SOCK_DIR/#g" \ - -e "s#SOCK_DIR/fuse-#TEST_DIR/#g" + sed -e "s#$TEST_DIR/#TEST_DIR/#g" \ + -e "s#$SOCK_DIR/#SOCK_DIR/#g" \ + -e "s#SOCK_DIR/fuse-#TEST_DIR/#g" } # replace occurrences of the actual IMGFMT value with IMGFMT _filter_imgfmt() { - $SED -e "s#$IMGFMT#IMGFMT#g" + sed -e "s#$IMGFMT#IMGFMT#g" } # Replace error message when the format is not supported and delete # the output lines after the first one _filter_qemu_img_check() { - $SED -e '/allocated.*fragmented.*compressed clusters/d' \ + gsed -e '/allocated.*fragmented.*compressed clusters/d' \ -e 's/qemu-img: This image format does not support checks/No errors were found on the image./' \ -e '/Image end offset: [0-9]\+/d' } @@ -66,13 +66,14 @@ _filter_qemu_img_check() # Removes \r from messages _filter_win32() { - $SED -e 's/\r//g' + gsed -e 's/\r//g' } # sanitize qemu-io output _filter_qemu_io() { - _filter_win32 | $SED -e "s/[0-9]* ops\; [0-9/:. sec]* ([0-9/.inf]* [EPTGMKiBbytes]*\/sec and [0-9/.inf]* ops\/sec)/X ops\; XX:XX:XX.X (XXX YYY\/sec and XXX ops\/sec)/" \ + _filter_win32 | \ + gsed -e "s/[0-9]* ops\; [0-9/:. sec]* ([0-9/.inf]* [EPTGMKiBbytes]*\/sec and [0-9/.inf]* ops\/sec)/X ops\; XX:XX:XX.X (XXX YYY\/sec and XXX ops\/sec)/" \ -e "s/: line [0-9][0-9]*: *[0-9][0-9]*\( Aborted\| Killed\)/:\1/" \ -e "s/qemu-io> //g" } @@ -80,7 +81,7 @@ _filter_qemu_io() # replace occurrences of QEMU_PROG with "qemu" _filter_qemu() { - $SED -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \ + gsed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \ -e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \ -e $'s#\r##' # QEMU monitor uses \r\n line endings } @@ -89,7 +90,7 @@ _filter_qemu() _filter_qmp() { _filter_win32 | \ - $SED -e 's#\("\(micro\)\?seconds": \)[0-9]\+#\1 TIMESTAMP#g' \ + gsed -e 's#\("\(micro\)\?seconds": \)[0-9]\+#\1 TIMESTAMP#g' \ -e 's#^{"QMP":.*}$#QMP_VERSION#' \ -e '/^ "QMP": {\s*$/, /^ }\s*$/ c\' \ -e ' QMP_VERSION' @@ -98,32 +99,32 @@ _filter_qmp() # readline makes HMP command strings so long that git complains _filter_hmp() { - $SED -e $'s/^\\((qemu) \\)\\?.*\e\\[D/\\1/g' \ + gsed -e $'s/^\\((qemu) \\)\\?.*\e\\[D/\\1/g' \ -e $'s/\e\\[K//g' } # replace block job offset _filter_block_job_offset() { - $SED -e 's/, "offset": [0-9]\+,/, "offset": OFFSET,/' + sed -e 's/, "offset": [0-9]\+,/, "offset": OFFSET,/' } # replace block job len _filter_block_job_len() { - $SED -e 's/, "len": [0-9]\+,/, "len": LEN,/g' + sed -e 's/, "len": [0-9]\+,/, "len": LEN,/g' } # replace actual image size (depends on the host filesystem) _filter_actual_image_size() { - $SED -s 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' + gsed -s 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g' } # Filename filters for qemu-img create _filter_img_create_filenames() { - $SED \ + sed \ -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ -e "s#$TEST_DIR#TEST_DIR#g" \ @@ -141,7 +142,7 @@ _do_filter_img_create() # precedes ", fmt=") and the options part ($options, which starts # with "fmt=") # (And just echo everything before the first "^Formatting") - readarray formatting_line < <($SED -e 's/, fmt=/\n/') + readarray formatting_line < <(gsed -e 's/, fmt=/\n/') filename_part=${formatting_line[0]} unset formatting_line[0] @@ -168,11 +169,11 @@ _do_filter_img_create() options=$( echo "$options" \ | tr '\n' '\0' \ - | $SED -e 's/ \([a-z0-9_.-]*\)=/\n\1=/g' \ + | gsed -e 's/ \([a-z0-9_.-]*\)=/\n\1=/g' \ | grep -a -e '^fmt' -e '^size' -e '^backing' -e '^preallocation' \ -e '^encryption' "${grep_data_file[@]}" \ | _filter_img_create_filenames \ - | $SED \ + | sed \ -e 's/^\(fmt\)/0-\1/' \ -e 's/^\(size\)/1-\1/' \ -e 's/^\(backing\)/2-\1/' \ @@ -180,9 +181,9 @@ _do_filter_img_create() -e 's/^\(encryption\)/4-\1/' \ -e 's/^\(preallocation\)/8-\1/' \ | LC_ALL=C sort \ - | $SED -e 's/^[0-9]-//' \ + | sed -e 's/^[0-9]-//' \ | tr '\n\0' ' \n' \ - | $SED -e 's/^ *$//' -e 's/ *$//' + | sed -e 's/^ *$//' -e 's/ *$//' ) if [ -n "$options" ]; then @@ -208,7 +209,7 @@ _filter_img_create() _filter_img_create_size() { - $SED -e "s# size=[0-9]\\+# size=SIZE#g" + gsed -e "s# size=[0-9]\\+# size=SIZE#g" } _filter_img_info() @@ -222,7 +223,7 @@ _filter_img_info() discard=0 regex_json_spec_start='^ *"format-specific": \{' - $SED -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ + gsed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ -e "s#$TEST_DIR#TEST_DIR#g" \ -e "s#$SOCK_DIR#SOCK_DIR#g" \ @@ -284,7 +285,7 @@ _filter_qemu_img_map() data_file_filter=(-e "s#$data_file_pattern#\\1#") fi - $SED -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \ + sed -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \ -e 's/"offset": [0-9]\+/"offset": OFFSET/g' \ -e 's/Mapped to *//' \ "${data_file_filter[@]}" \ @@ -298,7 +299,7 @@ _filter_nbd() # receive callbacks sometimes, making them unreliable. # # Filter out the TCP port number since this changes between runs. - $SED -e '/nbd\/.*\.c:/d' \ + sed -e '/nbd\/.*\.c:/d' \ -e 's#127\.0\.0\.1:[0-9]*#127.0.0.1:PORT#g' \ -e "s#?socket=$SOCK_DIR#?socket=SOCK_DIR#g" \ -e 's#\(foo\|PORT/\?\|.sock\): Failed to .*$#\1#' @@ -335,14 +336,14 @@ sys.stdout.write(result)' _filter_authz_check_tls() { - $SED -e 's/TLS x509 authz check for .* is denied/TLS x509 authz check for DISTINGUISHED-NAME is denied/' + sed -e 's/TLS x509 authz check for .* is denied/TLS x509 authz check for DISTINGUISHED-NAME is denied/' } _filter_qcow2_compression_type_bit() { - $SED -e 's/\(incompatible_features\s\+\)\[3\(, \)\?/\1[/' \ - -e 's/\(incompatible_features.*\), 3\]/\1]/' \ - -e 's/\(incompatible_features.*\), 3\(,.*\)/\1\2/' + gsed -e 's/\(incompatible_features\s\+\)\[3\(, \)\?/\1[/' \ + -e 's/\(incompatible_features.*\), 3\]/\1]/' \ + -e 's/\(incompatible_features.*\), 3\(,.*\)/\1\2/' } # make sure this script returns success diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 9885030b43..227e0a5be9 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -17,17 +17,28 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # -SED= -for sed in sed gsed; do - ($sed --version | grep 'GNU sed') > /dev/null 2>&1 - if [ "$?" -eq 0 ]; then - SED=$sed - break - fi -done -if [ -z "$SED" ]; then - echo "$0: GNU sed not found" - exit 1 +# bail out, setting up .notrun file +_notrun() +{ + echo "$*" >"$TEST_DIR/$seq.notrun" + echo "$seq not run: $*" + status=0 + exit +} + +if ! command -v gsed >/dev/null 2>&1; then + if sed --version 2>&1 | grep -v 'not GNU sed' | grep 'GNU sed' > /dev/null; + then + gsed() + { + sed "$@" + } + else + gsed() + { + _notrun "GNU sed not available" + } + fi fi dd() @@ -722,30 +733,20 @@ _img_info() done } -# bail out, setting up .notrun file -# -_notrun() -{ - echo "$*" >"$OUTPUT_DIR/$seq.notrun" - echo "$seq not run: $*" - status=0 - exit -} - # bail out, setting up .casenotrun file # The function _casenotrun() is used as a notifier. It is the # caller's responsibility to make skipped a particular test. # _casenotrun() { - echo " [case not run] $*" >>"$OUTPUT_DIR/$seq.casenotrun" + echo " [case not run] $*" >>"$TEST_DIR/$seq.casenotrun" } # just plain bail out # _fail() { - echo "$*" | tee -a "$OUTPUT_DIR/$seq.full" + echo "$*" | tee -a "$TEST_DIR/$seq.full" echo "(see $seq.full for details)" status=1 exit 1 @@ -920,7 +921,7 @@ _require_working_luks() IMGFMT='luks' _rm_test_img "$file" if [ $status != 0 ]; then - reason=$(echo "$output" | grep "$file:" | $SED -e "s#.*$file: *##") + reason=$(echo "$output" | grep "$file:" | sed -e "s#.*$file: *##") if [ -z "$reason" ]; then reason="Failed to create a LUKS image" fi diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 6ba65eb1ff..508adade9e 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -39,6 +39,7 @@ from contextlib import contextmanager from qemu.machine import qtest from qemu.qmp import QMPMessage +from qemu.aqmp.legacy import QEMUMonitorProtocol # Use this logger for logging messages directly from the iotests module logger = logging.getLogger('qemu.iotests') @@ -84,7 +85,6 @@ qemu_print = os.environ.get('PRINT_QEMU', False) imgfmt = os.environ.get('IMGFMT', 'raw') imgproto = os.environ.get('IMGPROTO', 'file') -output_dir = os.environ.get('OUTPUT_DIR', '.') try: test_dir = os.environ['TEST_DIR'] @@ -278,6 +278,9 @@ def qemu_io(*args): '''Run qemu-io and return the stdout data''' return qemu_tool_pipe_and_status('qemu-io', qemu_io_wrap_args(args))[0] +def qemu_io_pipe_and_status(*args): + return qemu_tool_pipe_and_status('qemu-io', qemu_io_wrap_args(args)) + def qemu_io_log(*args): result = qemu_io(*args) log(result, filters=[filter_testfiles, filter_qemu_io]) @@ -348,14 +351,30 @@ class QemuIoInteractive: class QemuStorageDaemon: - def __init__(self, *args: str, instance_id: str = 'a'): + _qmp: Optional[QEMUMonitorProtocol] = None + _qmpsock: Optional[str] = None + # Python < 3.8 would complain if this type were not a string literal + # (importing `annotations` from `__future__` would work; but not on <= 3.6) + _p: 'Optional[subprocess.Popen[bytes]]' = None + + def __init__(self, *args: str, instance_id: str = 'a', qmp: bool = False): assert '--pidfile' not in args self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid') all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile] + if qmp: + self._qmpsock = os.path.join(sock_dir, f'qsd-{instance_id}.sock') + all_args += ['--chardev', + f'socket,id=qmp-sock,path={self._qmpsock}', + '--monitor', 'qmp-sock'] + + self._qmp = QEMUMonitorProtocol(self._qmpsock, server=True) + # Cannot use with here, we want the subprocess to stay around # pylint: disable=consider-using-with self._p = subprocess.Popen(all_args) + if self._qmp is not None: + self._qmp.accept() while not os.path.exists(self.pidfile): if self._p.poll() is not None: cmd = ' '.join(all_args) @@ -370,11 +389,24 @@ class QemuStorageDaemon: assert self._pid == self._p.pid + def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \ + -> QMPMessage: + assert self._qmp is not None + return self._qmp.cmd(cmd, args) + def stop(self, kill_signal=15): self._p.send_signal(kill_signal) self._p.wait() self._p = None + if self._qmp: + self._qmp.close() + + if self._qmpsock is not None: + try: + os.remove(self._qmpsock) + except OSError: + pass try: os.remove(self.pidfile) except OSError: @@ -1209,7 +1241,7 @@ def notrun(reason): # Each test in qemu-iotests has a number ("seq") seq = os.path.basename(sys.argv[0]) - with open('%s/%s.notrun' % (output_dir, seq), 'w', encoding='utf-8') \ + with open('%s/%s.notrun' % (test_dir, seq), 'w', encoding='utf-8') \ as outfile: outfile.write(reason + '\n') logger.warning("%s not run: %s", seq, reason) @@ -1224,7 +1256,7 @@ def case_notrun(reason): # Each test in qemu-iotests has a number ("seq") seq = os.path.basename(sys.argv[0]) - with open('%s/%s.casenotrun' % (output_dir, seq), 'a', encoding='utf-8') \ + with open('%s/%s.casenotrun' % (test_dir, seq), 'a', encoding='utf-8') \ as outfile: outfile.write(' [case not run] ' + reason + '\n') diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py index 0f32897fe8..b11e943c8a 100644 --- a/tests/qemu-iotests/testenv.py +++ b/tests/qemu-iotests/testenv.py @@ -66,7 +66,7 @@ class TestEnv(ContextManager['TestEnv']): # pylint: disable=too-many-instance-attributes env_variables = ['PYTHONPATH', 'TEST_DIR', 'SOCK_DIR', 'SAMPLE_IMG_DIR', - 'OUTPUT_DIR', 'PYTHON', 'QEMU_PROG', 'QEMU_IMG_PROG', + 'PYTHON', 'QEMU_PROG', 'QEMU_IMG_PROG', 'QEMU_IO_PROG', 'QEMU_NBD_PROG', 'QSD_PROG', 'QEMU_OPTIONS', 'QEMU_IMG_OPTIONS', 'QEMU_IO_OPTIONS', 'QEMU_IO_OPTIONS_NO_FMT', @@ -106,7 +106,6 @@ class TestEnv(ContextManager['TestEnv']): TEST_DIR SOCK_DIR SAMPLE_IMG_DIR - OUTPUT_DIR """ # Path where qemu goodies live in this source tree. @@ -134,8 +133,6 @@ class TestEnv(ContextManager['TestEnv']): os.path.join(self.source_iotests, 'sample_images')) - self.output_dir = os.getcwd() # OUTPUT_DIR - def init_binaries(self) -> None: """Init binary path variables: PYTHON (for bash tests) diff --git a/tests/qemu-iotests/testrunner.py b/tests/qemu-iotests/testrunner.py index 9a94273975..41083ff9c6 100644 --- a/tests/qemu-iotests/testrunner.py +++ b/tests/qemu-iotests/testrunner.py @@ -259,9 +259,6 @@ class TestRunner(ContextManager['TestRunner']): """ f_test = Path(test) - f_bad = Path(f_test.name + '.out.bad') - f_notrun = Path(f_test.name + '.notrun') - f_casenotrun = Path(f_test.name + '.casenotrun') f_reference = Path(self.find_reference(test)) if not f_test.exists(): @@ -276,9 +273,6 @@ class TestRunner(ContextManager['TestRunner']): description='No qualified output ' f'(expected {f_reference})') - for p in (f_bad, f_notrun, f_casenotrun): - silent_unlink(p) - args = [str(f_test.resolve())] env = self.env.prepare_subprocess(args) if mp: @@ -288,6 +282,14 @@ class TestRunner(ContextManager['TestRunner']): env[d] = os.path.join(env[d], f_test.name) Path(env[d]).mkdir(parents=True, exist_ok=True) + test_dir = env['TEST_DIR'] + f_bad = Path(test_dir, f_test.name + '.out.bad') + f_notrun = Path(test_dir, f_test.name + '.notrun') + f_casenotrun = Path(test_dir, f_test.name + '.casenotrun') + + for p in (f_notrun, f_casenotrun): + silent_unlink(p) + t0 = time.time() with f_bad.open('w', encoding="utf-8") as f: with subprocess.Popen(args, cwd=str(f_test.parent), env=env, @@ -365,7 +367,10 @@ class TestRunner(ContextManager['TestRunner']): description=res.description) if res.casenotrun: - print(res.casenotrun) + if self.tap: + print('#' + res.casenotrun.replace('\n', '\n#')) + else: + print(res.casenotrun) return res diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io new file mode 100755 index 0000000000..567e8cf21e --- /dev/null +++ b/tests/qemu-iotests/tests/graph-changes-while-io @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# group: rw +# +# Test graph changes while I/O is happening +# +# Copyright (C) 2022 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import os +from threading import Thread +import iotests +from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \ + QemuStorageDaemon + + +top = os.path.join(iotests.test_dir, 'top.img') +nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') + + +def do_qemu_img_bench() -> None: + """ + Do some I/O requests on `nbd_sock`. + """ + assert qemu_img('bench', '-f', 'raw', '-c', '2000000', + f'nbd+unix:///node0?socket={nbd_sock}') == 0 + + +class TestGraphChangesWhileIO(QMPTestCase): + def setUp(self) -> None: + # Create an overlay that can be added at runtime on top of the + # null-co block node that will receive I/O + assert qemu_img_create('-f', imgfmt, '-F', 'raw', '-b', 'null-co://', + top) == 0 + + # QSD instance with a null-co block node in an I/O thread, + # exported over NBD (on `nbd_sock`, export name "node0") + self.qsd = QemuStorageDaemon( + '--object', 'iothread,id=iothread0', + '--blockdev', 'null-co,node-name=node0,read-zeroes=true', + '--nbd-server', f'addr.type=unix,addr.path={nbd_sock}', + '--export', 'nbd,id=exp0,node-name=node0,iothread=iothread0,' + + 'fixed-iothread=true,writable=true', + qmp=True + ) + + def tearDown(self) -> None: + self.qsd.stop() + + def test_blockdev_add_while_io(self) -> None: + # Run qemu-img bench in the background + bench_thr = Thread(target=do_qemu_img_bench) + bench_thr.start() + + # While qemu-img bench is running, repeatedly add and remove an + # overlay to/from node0 + while bench_thr.is_alive(): + result = self.qsd.qmp('blockdev-add', { + 'driver': imgfmt, + 'node-name': 'overlay', + 'backing': 'node0', + 'file': { + 'driver': 'file', + 'filename': top + } + }) + self.assert_qmp(result, 'return', {}) + + result = self.qsd.qmp('blockdev-del', { + 'node-name': 'overlay' + }) + self.assert_qmp(result, 'return', {}) + + bench_thr.join() + +if __name__ == '__main__': + # Format must support raw backing files + iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'], + supported_protocols=['file']) diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out new file mode 100644 index 0000000000..ae1213e6f8 --- /dev/null +++ b/tests/qemu-iotests/tests/graph-changes-while-io.out @@ -0,0 +1,5 @@ +. +---------------------------------------------------------------------- +Ran 1 tests + +OK diff --git a/tests/qemu-iotests/tests/image-fleecing b/tests/qemu-iotests/tests/image-fleecing index a58b5a1781..c56278639c 100755 --- a/tests/qemu-iotests/tests/image-fleecing +++ b/tests/qemu-iotests/tests/image-fleecing @@ -23,12 +23,14 @@ # Creator/Owner: John Snow <jsnow@redhat.com> import iotests -from iotests import log, qemu_img, qemu_io, qemu_io_silent +from iotests import log, qemu_img, qemu_io, qemu_io_silent, \ + qemu_io_pipe_and_status iotests.script_initialize( - supported_fmts=['qcow2', 'qcow', 'qed', 'vmdk', 'vhdx', 'raw'], + supported_fmts=['qcow2'], supported_platforms=['linux'], required_fmts=['copy-before-write'], + unsupported_imgopts=['compat'] ) patterns = [('0x5d', '0', '64k'), @@ -49,12 +51,30 @@ remainder = [('0xd5', '0x108000', '32k'), # Right-end of partial-left [1] ('0xdc', '32M', '32k'), # Left-end of partial-right [2] ('0xcd', '0x3ff0000', '64k')] # patterns[3] -def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm): +def do_test(vm, use_cbw, use_snapshot_access_filter, base_img_path, + fleece_img_path, nbd_sock_path=None, + target_img_path=None, + bitmap=False): + push_backup = target_img_path is not None + assert (nbd_sock_path is not None) != push_backup + if push_backup: + assert use_cbw + log('--- Setting up images ---') log('') assert qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M') == 0 - assert qemu_img('create', '-f', 'qcow2', fleece_img_path, '64M') == 0 + if bitmap: + assert qemu_img('bitmap', '--add', base_img_path, 'bitmap0') == 0 + + if use_snapshot_access_filter: + assert use_cbw + assert qemu_img('create', '-f', 'raw', fleece_img_path, '64M') == 0 + else: + assert qemu_img('create', '-f', 'qcow2', fleece_img_path, '64M') == 0 + + if push_backup: + assert qemu_img('create', '-f', 'qcow2', target_img_path, '64M') == 0 for p in patterns: qemu_io('-f', iotests.imgfmt, @@ -81,27 +101,46 @@ def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm): log('') - # create tmp_node backed by src_node - log(vm.qmp('blockdev-add', { - 'driver': 'qcow2', - 'node-name': tmp_node, - 'file': { + if use_snapshot_access_filter: + log(vm.qmp('blockdev-add', { + 'node-name': tmp_node, 'driver': 'file', 'filename': fleece_img_path, - }, - 'backing': src_node, - })) + })) + else: + # create tmp_node backed by src_node + log(vm.qmp('blockdev-add', { + 'driver': 'qcow2', + 'node-name': tmp_node, + 'file': { + 'driver': 'file', + 'filename': fleece_img_path, + }, + 'backing': src_node, + })) # Establish CBW from source to fleecing node if use_cbw: - log(vm.qmp('blockdev-add', { + fl_cbw = { 'driver': 'copy-before-write', 'node-name': 'fl-cbw', 'file': src_node, 'target': tmp_node - })) + } + + if bitmap: + fl_cbw['bitmap'] = {'node': src_node, 'name': 'bitmap0'} + + log(vm.qmp('blockdev-add', fl_cbw)) log(vm.qmp('qom-set', path=qom_path, property='drive', value='fl-cbw')) + + if use_snapshot_access_filter: + log(vm.qmp('blockdev-add', { + 'driver': 'snapshot-access', + 'node-name': 'fl-access', + 'file': 'fl-cbw', + })) else: log(vm.qmp('blockdev-backup', job_id='fleecing', @@ -109,25 +148,47 @@ def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm): target=tmp_node, sync='none')) - log('') - log('--- Setting up NBD Export ---') - log('') + export_node = 'fl-access' if use_snapshot_access_filter else tmp_node + + if push_backup: + log('') + log('--- Starting actual backup ---') + log('') - nbd_uri = 'nbd+unix:///%s?socket=%s' % (tmp_node, nbd_sock_path) - log(vm.qmp('nbd-server-start', - {'addr': {'type': 'unix', - 'data': {'path': nbd_sock_path}}})) + log(vm.qmp('blockdev-add', **{ + 'driver': iotests.imgfmt, + 'node-name': 'target', + 'file': { + 'driver': 'file', + 'filename': target_img_path + } + })) + log(vm.qmp('blockdev-backup', device=export_node, + sync='full', target='target', + job_id='push-backup', speed=1)) + else: + log('') + log('--- Setting up NBD Export ---') + log('') - log(vm.qmp('nbd-server-add', device=tmp_node)) + nbd_uri = 'nbd+unix:///%s?socket=%s' % (export_node, nbd_sock_path) + log(vm.qmp('nbd-server-start', + {'addr': { 'type': 'unix', + 'data': { 'path': nbd_sock_path } } })) - log('') - log('--- Sanity Check ---') - log('') + log(vm.qmp('nbd-server-add', device=export_node)) - for p in patterns + zeroes: - cmd = 'read -P%s %s %s' % p - log(cmd) - assert qemu_io_silent('-r', '-f', 'raw', '-c', cmd, nbd_uri) == 0 + log('') + log('--- Sanity Check ---') + log('') + + for p in patterns + zeroes: + cmd = 'read -P%s %s %s' % p + log(cmd) + out, ret = qemu_io_pipe_and_status('-r', '-f', 'raw', '-c', cmd, + nbd_uri) + if ret != 0: + print(out) log('') log('--- Testing COW ---') @@ -138,6 +199,23 @@ def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm): log(cmd) log(vm.hmp_qemu_io(qom_path, cmd, qdev=True)) + if push_backup: + # Check that previous operations were done during backup, not after + # If backup is already finished, it's possible that it was finished + # even before hmp qemu_io write, and we didn't actually test + # copy-before-write operation. This should not happen, as we use + # speed=1. But worth checking. + result = vm.qmp('query-block-jobs') + assert len(result['return']) == 1 + + result = vm.qmp('block-job-set-speed', device='push-backup', speed=0) + assert result == {'return': {}} + + log(vm.event_wait(name='BLOCK_JOB_COMPLETED', + match={'data': {'device': 'push-backup'}}), + filters=[iotests.filter_qmp_event]) + log(vm.qmp('blockdev-del', node_name='target')) + log('') log('--- Verifying Data ---') log('') @@ -145,13 +223,25 @@ def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm): for p in patterns + zeroes: cmd = 'read -P%s %s %s' % p log(cmd) - assert qemu_io_silent('-r', '-f', 'raw', '-c', cmd, nbd_uri) == 0 + args = ['-r', '-c', cmd] + if push_backup: + args += [target_img_path] + else: + args += ['-f', 'raw', nbd_uri] + out, ret = qemu_io_pipe_and_status(*args) + if ret != 0: + print(out) log('') log('--- Cleanup ---') log('') + if not push_backup: + log(vm.qmp('nbd-server-stop')) + if use_cbw: + if use_snapshot_access_filter: + log(vm.qmp('blockdev-del', node_name='fl-access')) log(vm.qmp('qom-set', path=qom_path, property='drive', value=src_node)) log(vm.qmp('blockdev-del', node_name='fl-cbw')) else: @@ -160,7 +250,6 @@ def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm): assert e is not None log(e, filters=[iotests.filter_qmp_event]) - log(vm.qmp('nbd-server-stop')) log(vm.qmp('blockdev-del', node_name=tmp_node)) vm.shutdown() @@ -177,17 +266,37 @@ def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm): log('Done') -def test(use_cbw): +def test(use_cbw, use_snapshot_access_filter, + nbd_sock_path=None, target_img_path=None, bitmap=False): with iotests.FilePath('base.img') as base_img_path, \ iotests.FilePath('fleece.img') as fleece_img_path, \ - iotests.FilePath('nbd.sock', - base_dir=iotests.sock_dir) as nbd_sock_path, \ iotests.VM() as vm: - do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm) + do_test(vm, use_cbw, use_snapshot_access_filter, base_img_path, + fleece_img_path, nbd_sock_path, target_img_path, + bitmap=bitmap) + +def test_pull(use_cbw, use_snapshot_access_filter, bitmap=False): + with iotests.FilePath('nbd.sock', + base_dir=iotests.sock_dir) as nbd_sock_path: + test(use_cbw, use_snapshot_access_filter, nbd_sock_path, None, + bitmap=bitmap) + +def test_push(): + with iotests.FilePath('target.img') as target_img_path: + test(True, True, None, target_img_path) log('=== Test backup(sync=none) based fleecing ===\n') -test(False) +test_pull(False, False) + +log('=== Test cbw-filter based fleecing ===\n') +test_pull(True, False) + +log('=== Test fleecing-format based fleecing ===\n') +test_pull(True, True) + +log('=== Test fleecing-format based fleecing with bitmap ===\n') +test_pull(True, True, bitmap=True) -log('=== Test filter based fleecing ===\n') -test(True) +log('=== Test push backup with fleecing ===\n') +test_push() diff --git a/tests/qemu-iotests/tests/image-fleecing.out b/tests/qemu-iotests/tests/image-fleecing.out index e96d122a8b..acfc89ff0e 100644 --- a/tests/qemu-iotests/tests/image-fleecing.out +++ b/tests/qemu-iotests/tests/image-fleecing.out @@ -52,8 +52,150 @@ read -P0 0x3fe0000 64k --- Cleanup --- {"return": {}} +{"return": {}} {"data": {"device": "fleecing", "len": 67108864, "offset": 393216, "speed": 0, "type": "backup"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} {"return": {}} + +--- Confirming writes --- + +read -P0xab 0 64k +read -P0xad 0x00f8000 64k +read -P0x1d 0x2008000 64k +read -P0xea 0x3fe0000 64k +read -P0xd5 0x108000 32k +read -P0xdc 32M 32k +read -P0xcd 0x3ff0000 64k + +Done +=== Test cbw-filter based fleecing === + +--- Setting up images --- + +Done + +--- Launching VM --- + +Done + +--- Setting up Fleecing Graph --- + +{"return": {}} +{"return": {}} +{"return": {}} + +--- Setting up NBD Export --- + +{"return": {}} +{"return": {}} + +--- Sanity Check --- + +read -P0x5d 0 64k +read -P0xd5 1M 64k +read -P0xdc 32M 64k +read -P0xcd 0x3ff0000 64k +read -P0 0x00f8000 32k +read -P0 0x2010000 32k +read -P0 0x3fe0000 64k + +--- Testing COW --- + +write -P0xab 0 64k +{"return": ""} +write -P0xad 0x00f8000 64k +{"return": ""} +write -P0x1d 0x2008000 64k +{"return": ""} +write -P0xea 0x3fe0000 64k +{"return": ""} + +--- Verifying Data --- + +read -P0x5d 0 64k +read -P0xd5 1M 64k +read -P0xdc 32M 64k +read -P0xcd 0x3ff0000 64k +read -P0 0x00f8000 32k +read -P0 0x2010000 32k +read -P0 0x3fe0000 64k + +--- Cleanup --- + +{"return": {}} +{"return": {}} +{"return": {}} +{"return": {}} + +--- Confirming writes --- + +read -P0xab 0 64k +read -P0xad 0x00f8000 64k +read -P0x1d 0x2008000 64k +read -P0xea 0x3fe0000 64k +read -P0xd5 0x108000 32k +read -P0xdc 32M 32k +read -P0xcd 0x3ff0000 64k + +Done +=== Test fleecing-format based fleecing === + +--- Setting up images --- + +Done + +--- Launching VM --- + +Done + +--- Setting up Fleecing Graph --- + +{"return": {}} +{"return": {}} +{"return": {}} +{"return": {}} + +--- Setting up NBD Export --- + +{"return": {}} +{"return": {}} + +--- Sanity Check --- + +read -P0x5d 0 64k +read -P0xd5 1M 64k +read -P0xdc 32M 64k +read -P0xcd 0x3ff0000 64k +read -P0 0x00f8000 32k +read -P0 0x2010000 32k +read -P0 0x3fe0000 64k + +--- Testing COW --- + +write -P0xab 0 64k +{"return": ""} +write -P0xad 0x00f8000 64k +{"return": ""} +write -P0x1d 0x2008000 64k +{"return": ""} +write -P0xea 0x3fe0000 64k +{"return": ""} + +--- Verifying Data --- + +read -P0x5d 0 64k +read -P0xd5 1M 64k +read -P0xdc 32M 64k +read -P0xcd 0x3ff0000 64k +read -P0 0x00f8000 32k +read -P0 0x2010000 32k +read -P0 0x3fe0000 64k + +--- Cleanup --- + +{"return": {}} +{"return": {}} +{"return": {}} +{"return": {}} {"return": {}} --- Confirming writes --- @@ -67,7 +209,7 @@ read -P0xdc 32M 32k read -P0xcd 0x3ff0000 64k Done -=== Test filter based fleecing === +=== Test fleecing-format based fleecing with bitmap === --- Setting up images --- @@ -82,6 +224,7 @@ Done {"return": {}} {"return": {}} {"return": {}} +{"return": {}} --- Setting up NBD Export --- @@ -95,8 +238,82 @@ read -P0xd5 1M 64k read -P0xdc 32M 64k read -P0xcd 0x3ff0000 64k read -P0 0x00f8000 32k +read failed: Invalid argument + +read -P0 0x2010000 32k +read failed: Invalid argument + +read -P0 0x3fe0000 64k +read failed: Invalid argument + + +--- Testing COW --- + +write -P0xab 0 64k +{"return": ""} +write -P0xad 0x00f8000 64k +{"return": ""} +write -P0x1d 0x2008000 64k +{"return": ""} +write -P0xea 0x3fe0000 64k +{"return": ""} + +--- Verifying Data --- + +read -P0x5d 0 64k +read -P0xd5 1M 64k +read -P0xdc 32M 64k +read -P0xcd 0x3ff0000 64k +read -P0 0x00f8000 32k +read failed: Invalid argument + read -P0 0x2010000 32k +read failed: Invalid argument + read -P0 0x3fe0000 64k +read failed: Invalid argument + + +--- Cleanup --- + +{"return": {}} +{"return": {}} +{"return": {}} +{"return": {}} +{"return": {}} + +--- Confirming writes --- + +read -P0xab 0 64k +read -P0xad 0x00f8000 64k +read -P0x1d 0x2008000 64k +read -P0xea 0x3fe0000 64k +read -P0xd5 0x108000 32k +read -P0xdc 32M 32k +read -P0xcd 0x3ff0000 64k + +Done +=== Test push backup with fleecing === + +--- Setting up images --- + +Done + +--- Launching VM --- + +Done + +--- Setting up Fleecing Graph --- + +{"return": {}} +{"return": {}} +{"return": {}} +{"return": {}} + +--- Starting actual backup --- + +{"return": {}} +{"return": {}} --- Testing COW --- @@ -108,6 +325,8 @@ write -P0x1d 0x2008000 64k {"return": ""} write -P0xea 0x3fe0000 64k {"return": ""} +{"data": {"device": "push-backup", "len": 67108864, "offset": 67108864, "speed": 0, "type": "backup"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"return": {}} --- Verifying Data --- diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c index 502e5ad0c7..01ca076afe 100644 --- a/tests/qtest/virtio-9p-test.c +++ b/tests/qtest/virtio-9p-test.c @@ -1253,7 +1253,7 @@ static void fs_unlinkat_dir(void *obj, void *data, QGuestAllocator *t_alloc) /* ... and is actually a directory */ g_assert((st.st_mode & S_IFMT) == S_IFDIR); - do_unlinkat(v9p, "/", "02", AT_REMOVEDIR); + do_unlinkat(v9p, "/", "02", P9_DOTL_AT_REMOVEDIR); /* directory should be gone now */ g_assert(stat(new_dir, &st) != 0); } diff --git a/tests/tcg/configure.sh b/tests/tcg/configure.sh index 0663bd19f4..ed4b5ccb1f 100755 --- a/tests/tcg/configure.sh +++ b/tests/tcg/configure.sh @@ -64,9 +64,9 @@ fi : ${cross_cc_ppc="powerpc-linux-gnu-gcc"} : ${cross_cc_cflags_ppc="-m32"} : ${cross_cc_ppc64="powerpc64-linux-gnu-gcc"} -: ${cross_cc_cflags_ppc64="-m64 -mbig"} +: ${cross_cc_cflags_ppc64="-m64 -mbig-endian"} : ${cross_cc_ppc64le="$cross_cc_ppc64"} -: ${cross_cc_cflags_ppc64le="-m64 -mlittle"} +: ${cross_cc_cflags_ppc64le="-m64 -mlittle-endian"} : ${cross_cc_riscv64="riscv64-linux-gnu-gcc"} : ${cross_cc_s390x="s390x-linux-gnu-gcc"} : ${cross_cc_sh4="sh4-linux-gnu-gcc"} diff --git a/tests/tcg/ppc64le/bcdsub.c b/tests/tcg/ppc64le/bcdsub.c index 8c188cae6d..87c8c44a44 100644 --- a/tests/tcg/ppc64le/bcdsub.c +++ b/tests/tcg/ppc64le/bcdsub.c @@ -1,6 +1,7 @@ #include <assert.h> #include <unistd.h> #include <signal.h> +#include <stdint.h> #define CRF_LT (1 << 3) #define CRF_GT (1 << 2) @@ -8,24 +9,50 @@ #define CRF_SO (1 << 0) #define UNDEF 0 -#define BCDSUB(vra, vrb, ps) \ - asm ("bcdsub. %1,%2,%3,%4;" \ - "mfocrf %0,0b10;" \ - : "=r" (cr), "=v" (vrt) \ - : "v" (vra), "v" (vrb), "i" (ps) \ - : ); - -#define TEST(vra, vrb, ps, exp_res, exp_cr6) \ - do { \ - __int128 vrt = 0; \ - int cr = 0; \ - BCDSUB(vra, vrb, ps); \ - if (exp_res) \ - assert(vrt == exp_res); \ - assert((cr >> 4) == exp_cr6); \ +#ifdef __has_builtin +#if !__has_builtin(__builtin_bcdsub) +#define NO_BUILTIN_BCDSUB +#endif +#endif + +#ifdef NO_BUILTIN_BCDSUB +#define BCDSUB(T, A, B, PS) \ + ".long 4 << 26 | (" #T ") << 21 | (" #A ") << 16 | (" #B ") << 11" \ + " | 1 << 10 | (" #PS ") << 9 | 65\n\t" +#else +#define BCDSUB(T, A, B, PS) "bcdsub. " #T ", " #A ", " #B ", " #PS "\n\t" +#endif + +#define TEST(AH, AL, BH, BL, PS, TH, TL, CR6) \ + do { \ + int cr = 0; \ + uint64_t th, tl; \ + /* \ + * Use GPR pairs to load the VSR values and place the resulting VSR and\ + * CR6 in th, tl, and cr. Note that we avoid newer instructions (e.g., \ + * mtvsrdd/mfvsrld) so we can run this test on POWER8 machines. \ + */ \ + asm ("mtvsrd 32, %3\n\t" \ + "mtvsrd 33, %4\n\t" \ + "xxmrghd 32, 32, 33\n\t" \ + "mtvsrd 33, %5\n\t" \ + "mtvsrd 34, %6\n\t" \ + "xxmrghd 33, 33, 34\n\t" \ + BCDSUB(0, 0, 1, PS) \ + "mfocrf %0, 0b10\n\t" \ + "mfvsrd %1, 32\n\t" \ + "xxswapd 32, 32\n\t" \ + "mfvsrd %2, 32\n\t" \ + : "=r" (cr), "=r" (th), "=r" (tl) \ + : "r" (AH), "r" (AL), "r" (BH), "r" (BL) \ + : "v0", "v1", "v2"); \ + if (TH != UNDEF || TL != UNDEF) { \ + assert(tl == TL); \ + assert(th == TH); \ + } \ + assert((cr >> 4) == CR6); \ } while (0) - /* * Unbounded result is equal to zero: * sign = (PS) ? 0b1111 : 0b1100 @@ -33,13 +60,13 @@ */ void test_bcdsub_eq(void) { - __int128 a, b; - /* maximum positive BCD value */ - a = b = (((__int128) 0x9999999999999999) << 64 | 0x999999999999999c); - - TEST(a, b, 0, 0xc, CRF_EQ); - TEST(a, b, 1, 0xf, CRF_EQ); + TEST(0x9999999999999999, 0x999999999999999c, + 0x9999999999999999, 0x999999999999999c, + 0, 0x0, 0xc, CRF_EQ); + TEST(0x9999999999999999, 0x999999999999999c, + 0x9999999999999999, 0x999999999999999c, + 1, 0x0, 0xf, CRF_EQ); } /* @@ -49,21 +76,16 @@ void test_bcdsub_eq(void) */ void test_bcdsub_gt(void) { - __int128 a, b, c; - - /* maximum positive BCD value */ - a = (((__int128) 0x9999999999999999) << 64 | 0x999999999999999c); - - /* negative one BCD value */ - b = (__int128) 0x1d; - - TEST(a, b, 0, 0xc, (CRF_GT | CRF_SO)); - TEST(a, b, 1, 0xf, (CRF_GT | CRF_SO)); - - c = (((__int128) 0x9999999999999999) << 64 | 0x999999999999998c); - - TEST(c, b, 0, a, CRF_GT); - TEST(c, b, 1, (a | 0x3), CRF_GT); + /* maximum positive and negative one BCD values */ + TEST(0x9999999999999999, 0x999999999999999c, 0x0, 0x1d, 0, + 0x0, 0xc, (CRF_GT | CRF_SO)); + TEST(0x9999999999999999, 0x999999999999999c, 0x0, 0x1d, 1, + 0x0, 0xf, (CRF_GT | CRF_SO)); + + TEST(0x9999999999999999, 0x999999999999998c, 0x0, 0x1d, 0, + 0x9999999999999999, 0x999999999999999c, CRF_GT); + TEST(0x9999999999999999, 0x999999999999998c, 0x0, 0x1d, 1, + 0x9999999999999999, 0x999999999999999f, CRF_GT); } /* @@ -73,45 +95,27 @@ void test_bcdsub_gt(void) */ void test_bcdsub_lt(void) { - __int128 a, b; - - /* positive zero BCD value */ - a = (__int128) 0xc; - - /* positive one BCD value */ - b = (__int128) 0x1c; - - TEST(a, b, 0, 0x1d, CRF_LT); - TEST(a, b, 1, 0x1d, CRF_LT); - - /* maximum negative BCD value */ - a = (((__int128) 0x9999999999999999) << 64 | 0x999999999999999d); - - /* positive one BCD value */ - b = (__int128) 0x1c; - - TEST(a, b, 0, 0xd, (CRF_LT | CRF_SO)); - TEST(a, b, 1, 0xd, (CRF_LT | CRF_SO)); + /* positive zero and positive one BCD values */ + TEST(0x0, 0xc, 0x0, 0x1c, 0, 0x0, 0x1d, CRF_LT); + TEST(0x0, 0xc, 0x0, 0x1c, 1, 0x0, 0x1d, CRF_LT); + + /* maximum negative and positive one BCD values */ + TEST(0x9999999999999999, 0x999999999999999d, 0x0, 0x1c, 0, + 0x0, 0xd, (CRF_LT | CRF_SO)); + TEST(0x9999999999999999, 0x999999999999999d, 0x0, 0x1c, 1, + 0x0, 0xd, (CRF_LT | CRF_SO)); } void test_bcdsub_invalid(void) { - __int128 a, b; - - /* positive one BCD value */ - a = (__int128) 0x1c; - b = 0xf00; - - TEST(a, b, 0, UNDEF, CRF_SO); - TEST(a, b, 1, UNDEF, CRF_SO); - - TEST(b, a, 0, UNDEF, CRF_SO); - TEST(b, a, 1, UNDEF, CRF_SO); + TEST(0x0, 0x1c, 0x0, 0xf00, 0, UNDEF, UNDEF, CRF_SO); + TEST(0x0, 0x1c, 0x0, 0xf00, 1, UNDEF, UNDEF, CRF_SO); - a = 0xbad; + TEST(0x0, 0xf00, 0x0, 0x1c, 0, UNDEF, UNDEF, CRF_SO); + TEST(0x0, 0xf00, 0x0, 0x1c, 1, UNDEF, UNDEF, CRF_SO); - TEST(a, b, 0, UNDEF, CRF_SO); - TEST(a, b, 1, UNDEF, CRF_SO); + TEST(0x0, 0xbad, 0x0, 0xf00, 0, UNDEF, UNDEF, CRF_SO); + TEST(0x0, 0xbad, 0x0, 0xf00, 1, UNDEF, UNDEF, CRF_SO); } int main(void) diff --git a/tests/tcg/ppc64le/mtfsf.c b/tests/tcg/ppc64le/mtfsf.c index b3d31f3637..bed5b1afa4 100644 --- a/tests/tcg/ppc64le/mtfsf.c +++ b/tests/tcg/ppc64le/mtfsf.c @@ -1,8 +1,12 @@ #include <stdlib.h> +#include <stdint.h> #include <assert.h> #include <signal.h> #include <sys/prctl.h> +#define MTFSF(FLM, FRB) asm volatile ("mtfsf %0, %1" :: "i" (FLM), "f" (FRB)) +#define MFFS(FRT) asm("mffs %0" : "=f" (FRT)) + #define FPSCR_VE 7 /* Floating-point invalid operation exception enable */ #define FPSCR_VXSOFT 10 /* Floating-point invalid operation exception (soft) */ #define FPSCR_FI 17 /* Floating-point fraction inexact */ @@ -21,10 +25,7 @@ void sigfpe_handler(int sig, siginfo_t *si, void *ucontext) int main(void) { - union { - double d; - long long ll; - } fpscr; + uint64_t fpscr; struct sigaction sa = { .sa_sigaction = sigfpe_handler, @@ -40,10 +41,9 @@ int main(void) prctl(PR_SET_FPEXC, PR_FP_EXC_PRECISE); /* First test if the FI bit is being set correctly */ - fpscr.ll = FP_FI; - __builtin_mtfsf(0b11111111, fpscr.d); - fpscr.d = __builtin_mffs(); - assert((fpscr.ll & FP_FI) != 0); + MTFSF(0b11111111, FP_FI); + MFFS(fpscr); + assert((fpscr & FP_FI) != 0); /* Then test if the deferred exception is being called correctly */ sigaction(SIGFPE, &sa, NULL); @@ -54,8 +54,7 @@ int main(void) * But if a different exception is chosen si_code check should * change accordingly. */ - fpscr.ll = FP_VE | FP_VXSOFT; - __builtin_mtfsf(0b11111111, fpscr.d); + MTFSF(0b11111111, FP_VE | FP_VXSOFT); return 1; } diff --git a/tests/tcg/ppc64le/non_signalling_xscv.c b/tests/tcg/ppc64le/non_signalling_xscv.c index 91e25cad46..836df71ef0 100644 --- a/tests/tcg/ppc64le/non_signalling_xscv.c +++ b/tests/tcg/ppc64le/non_signalling_xscv.c @@ -6,16 +6,16 @@ #define TEST(INSN, B_HI, B_LO, T_HI, T_LO) \ do { \ uint64_t th, tl, bh = B_HI, bl = B_LO; \ - asm("mtvsrd 0, %2\n\t" \ - "mtvsrd 1, %3\n\t" \ - "xxmrghd 0, 0, 1\n\t" \ - INSN " 0, 0\n\t" \ - "mfvsrd %0, 0\n\t" \ - "xxswapd 0, 0\n\t" \ - "mfvsrd %1, 0\n\t" \ + asm("mtvsrd 32, %2\n\t" \ + "mtvsrd 33, %3\n\t" \ + "xxmrghd 32, 32, 33\n\t" \ + INSN " 32, 32\n\t" \ + "mfvsrd %0, 32\n\t" \ + "xxswapd 32, 32\n\t" \ + "mfvsrd %1, 32\n\t" \ : "=r" (th), "=r" (tl) \ : "r" (bh), "r" (bl) \ - : "vs0", "vs1"); \ + : "v0", "v1"); \ printf(INSN "(0x%016" PRIx64 "%016" PRIx64 ") = 0x%016" PRIx64 \ "%016" PRIx64 "\n", bh, bl, th, tl); \ assert(th == T_HI && tl == T_LO); \ diff --git a/tests/tcg/s390x/exrl-trt.c b/tests/tcg/s390x/exrl-trt.c index 16711a3181..451f777b9d 100644 --- a/tests/tcg/s390x/exrl-trt.c +++ b/tests/tcg/s390x/exrl-trt.c @@ -5,8 +5,8 @@ int main(void) { char op1[] = "hello"; char op2[256]; - uint64_t r1 = 0xffffffffffffffffull; - uint64_t r2 = 0xffffffffffffffffull; + register uint64_t r1 asm("r1") = 0xffffffffffffffffull; + register uint64_t r2 asm("r2") = 0xffffffffffffffffull; uint64_t cc; int i; @@ -21,8 +21,6 @@ int main(void) " j 2f\n" "1: trt 0(1,%[op1]),%[op2]\n" "2: exrl %[op1_len],1b\n" - " lgr %[r1],%%r1\n" - " lgr %[r2],%%r2\n" " ipm %[cc]\n" : [r1] "+r" (r1), [r2] "+r" (r2), @@ -30,7 +28,7 @@ int main(void) : [op1] "a" (&op1), [op1_len] "a" (5), [op2] "Q" (op2) - : "r1", "r2", "cc"); + : "cc"); cc = (cc >> 28) & 3; if (cc != 2) { write(1, "bad cc\n", 7); diff --git a/tests/tcg/s390x/exrl-trtr.c b/tests/tcg/s390x/exrl-trtr.c index 5f30cda6bd..422f7f385a 100644 --- a/tests/tcg/s390x/exrl-trtr.c +++ b/tests/tcg/s390x/exrl-trtr.c @@ -5,8 +5,8 @@ int main(void) { char op1[] = {0, 1, 2, 3}; char op2[256]; - uint64_t r1 = 0xffffffffffffffffull; - uint64_t r2 = 0xffffffffffffffffull; + register uint64_t r1 asm("r1") = 0xffffffffffffffffull; + register uint64_t r2 asm("r2") = 0xffffffffffffffffull; uint64_t cc; int i; @@ -21,8 +21,6 @@ int main(void) " j 2f\n" "1: trtr 3(1,%[op1]),%[op2]\n" "2: exrl %[op1_len],1b\n" - " lgr %[r1],%%r1\n" - " lgr %[r2],%%r2\n" " ipm %[cc]\n" : [r1] "+r" (r1), [r2] "+r" (r2), @@ -30,7 +28,7 @@ int main(void) : [op1] "a" (&op1), [op1_len] "a" (3), [op2] "Q" (op2) - : "r1", "r2", "cc"); + : "cc"); cc = (cc >> 28) & 3; if (cc != 1) { write(1, "bad cc\n", 7); diff --git a/tests/tcg/s390x/mie3-mvcrl.c b/tests/tcg/s390x/mie3-mvcrl.c index 57b08e48d0..93c7b0a290 100644 --- a/tests/tcg/s390x/mie3-mvcrl.c +++ b/tests/tcg/s390x/mie3-mvcrl.c @@ -1,15 +1,17 @@ #include <stdint.h> #include <string.h> + static inline void mvcrl_8(const char *dst, const char *src) { asm volatile ( - "llill %%r0, 8\n" - ".insn sse, 0xE50A00000000, 0(%[dst]), 0(%[src])" - : : [dst] "d" (dst), [src] "d" (src) - : "memory"); + "llill %%r0, 8\n" + ".insn sse, 0xE50A00000000, 0(%[dst]), 0(%[src])" + : : [dst] "d" (dst), [src] "d" (src) + : "r0", "memory"); } + int main(int argc, char *argv[]) { const char *alpha = "abcdefghijklmnop"; diff --git a/tests/tcg/s390x/mie3-sel.c b/tests/tcg/s390x/mie3-sel.c index b0c5c9857d..0dfd532ed4 100644 --- a/tests/tcg/s390x/mie3-sel.c +++ b/tests/tcg/s390x/mie3-sel.c @@ -1,32 +1,27 @@ #include <stdint.h> + #define Fi3(S, ASM) uint64_t S(uint64_t a, uint64_t b, uint64_t c) \ -{ \ - uint64_t res = 0; \ - asm ( \ - "lg %%r2, %[a]\n" \ - "lg %%r3, %[b]\n" \ - "lg %%r0, %[c]\n" \ - "ltgr %%r0, %%r0\n" \ - ASM \ - "stg %%r0, %[res] " \ - : [res] "=m" (res) \ - : [a] "m" (a), \ - [b] "m" (b), \ - [c] "m" (c) \ - : "r0", "r2", \ - "r3", "r4" \ - ); \ - return res; \ +{ \ +asm volatile ( \ + "ltgr %[c], %[c]\n" \ + ASM \ + : [c] "+r" (c) \ + : [a] "r" (a) \ + , [b] "r" (b) \ +); \ + return c; \ } -Fi3 (_selre, ".insn rrf, 0xB9F00000, %%r0, %%r3, %%r2, 8\n") -Fi3 (_selgrz, ".insn rrf, 0xB9E30000, %%r0, %%r3, %%r2, 8\n") -Fi3 (_selfhrnz, ".insn rrf, 0xB9C00000, %%r0, %%r3, %%r2, 7\n") +Fi3 (_selre, ".insn rrf, 0xB9F00000, %[c], %[b], %[a], 8\n") +Fi3 (_selgrz, ".insn rrf, 0xB9E30000, %[c], %[b], %[a], 8\n") +Fi3 (_selfhrnz, ".insn rrf, 0xB9C00000, %[c], %[b], %[a], 7\n") + int main(int argc, char *argv[]) { uint64_t a = ~0, b = ~0, c = ~0; + a = _selre(0x066600000066ull, 0x066600000006ull, a); b = _selgrz(0xF00D00000005ull, 0xF00D00000055ull, b); c = _selfhrnz(0x043200000044ull, 0x065400000004ull, c); diff --git a/tests/tcg/s390x/mvc.c b/tests/tcg/s390x/mvc.c index aa552d52e5..7ae4c44550 100644 --- a/tests/tcg/s390x/mvc.c +++ b/tests/tcg/s390x/mvc.c @@ -20,8 +20,8 @@ static inline void mvc_256(const char *dst, const char *src) asm volatile ( " mvc 0(256,%[dst]),0(%[src])\n" : - : [dst] "d" (dst), - [src] "d" (src) + : [dst] "a" (dst), + [src] "a" (src) : "memory"); } diff --git a/tests/tcg/s390x/mvo.c b/tests/tcg/s390x/mvo.c index 5546fe2a97..0c3ecdde2e 100644 --- a/tests/tcg/s390x/mvo.c +++ b/tests/tcg/s390x/mvo.c @@ -11,8 +11,8 @@ int main(void) asm volatile ( " mvo 0(4,%[dest]),0(3,%[src])\n" : - : [dest] "d" (dest + 1), - [src] "d" (src + 1) + : [dest] "a" (dest + 1), + [src] "a" (src + 1) : "memory"); for (i = 0; i < sizeof(expected); i++) { diff --git a/tests/tcg/s390x/pack.c b/tests/tcg/s390x/pack.c index 4be36f29a7..55e7e214e8 100644 --- a/tests/tcg/s390x/pack.c +++ b/tests/tcg/s390x/pack.c @@ -9,7 +9,7 @@ int main(void) asm volatile( " pack 2(4,%[data]),2(4,%[data])\n" : - : [data] "r" (&data[0]) + : [data] "a" (&data[0]) : "memory"); for (i = 0; i < 8; i++) { if (data[i] != exp[i]) { diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c index 2a3ef58799..f5e75a96b6 100644 --- a/tests/unit/ptimer-test-stubs.c +++ b/tests/unit/ptimer-test-stubs.c @@ -12,7 +12,6 @@ #include "qemu/main-loop.h" #include "sysemu/replay.h" #include "migration/vmstate.h" -#include "sysemu/cpu-timers.h" #include "ptimer-test.h" diff --git a/tests/unit/rcutorture.c b/tests/unit/rcutorture.c index de6f649058..495a4e6f42 100644 --- a/tests/unit/rcutorture.c +++ b/tests/unit/rcutorture.c @@ -122,7 +122,7 @@ static void *rcu_read_perf_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -148,7 +148,7 @@ static void *rcu_update_perf_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -253,7 +253,7 @@ static void *rcu_read_stress_test(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); } @@ -304,7 +304,7 @@ static void *rcu_update_stress_test(void *arg) struct rcu_stress *cp = qatomic_read(&rcu_stress_current); rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); @@ -347,7 +347,7 @@ static void *rcu_fake_update_stress_test(void *arg) { rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); while (goflag == GOFLAG_INIT) { g_usleep(1000); } diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c index aea660aeed..94718c9319 100644 --- a/tests/unit/test-block-iothread.c +++ b/tests/unit/test-block-iothread.c @@ -279,10 +279,10 @@ static void test_sync_op_check(BdrvChild *c) g_assert_cmpint(ret, ==, -ENOTSUP); } -static void test_sync_op_invalidate_cache(BdrvChild *c) +static void test_sync_op_activate(BdrvChild *c) { /* Early success: Image is not inactive */ - bdrv_invalidate_cache(c->bs, NULL); + bdrv_activate(c->bs, NULL); } @@ -325,8 +325,8 @@ const SyncOpTest sync_op_tests[] = { .name = "/sync-op/check", .fn = test_sync_op_check, }, { - .name = "/sync-op/invalidate_cache", - .fn = test_sync_op_invalidate_cache, + .name = "/sync-op/activate", + .fn = test_sync_op_activate, }, }; diff --git a/tests/unit/test-rcu-list.c b/tests/unit/test-rcu-list.c index 49641e1936..64b81ae058 100644 --- a/tests/unit/test-rcu-list.c +++ b/tests/unit/test-rcu-list.c @@ -171,7 +171,7 @@ static void *rcu_q_reader(void *arg) rcu_register_thread(); - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (qatomic_read(&goflag) == GOFLAG_INIT) { g_usleep(1000); @@ -206,7 +206,7 @@ static void *rcu_q_updater(void *arg) long long n_removed_local = 0; struct list_element *el, *prev_el; - *(struct rcu_reader_data **)arg = &rcu_reader; + *(struct rcu_reader_data **)arg = get_ptr_rcu_reader(); qatomic_inc(&nthreadsrunning); while (qatomic_read(&goflag) == GOFLAG_INIT) { g_usleep(1000); diff --git a/tests/vm/haiku.x86_64 b/tests/vm/haiku.x86_64 index 2eb736dae1..936f7d2ae2 100755 --- a/tests/vm/haiku.x86_64 +++ b/tests/vm/haiku.x86_64 @@ -2,7 +2,7 @@ # # Haiku VM image # -# Copyright 2020 Haiku, Inc. +# Copyright 2020-2022 Haiku, Inc. # # Authors: # Alexander von Gluck IV <kallisti5@unixzen.com> @@ -48,8 +48,8 @@ class HaikuVM(basevm.BaseVM): name = "haiku" arch = "x86_64" - link = "https://app.vagrantup.com/haiku-os/boxes/r1beta2-x86_64/versions/20200702/providers/libvirt.box" - csum = "41c38b316e0cbdbc66b5dbaf3612b866700a4f35807cb1eb266a5bf83e9e68d5" + link = "https://app.vagrantup.com/haiku-os/boxes/r1beta3-x86_64/versions/20220216/providers/libvirt.box" + csum = "e67d4aacbcc687013d5cc91990ddd86cc5d70a5d28432ae2691944f8ce5d5041" poweroff = "shutdown" @@ -99,7 +99,7 @@ class HaikuVM(basevm.BaseVM): self.print_step("Extracting disk image") - subprocess.check_call(["tar", "xzf", tarball, "./box.img", "-O"], + subprocess.check_call(["tar", "xzf", tarball, "box.img", "-O"], stdout=open(img, 'wb')) self.print_step("Preparing disk image") diff --git a/ui/clipboard.c b/ui/clipboard.c index 5f15cf853d..9079ef829b 100644 --- a/ui/clipboard.c +++ b/ui/clipboard.c @@ -66,8 +66,10 @@ void qemu_clipboard_update(QemuClipboardInfo *info) notifier_list_notify(&clipboard_notifiers, ¬ify); - qemu_clipboard_info_unref(cbinfo[info->selection]); - cbinfo[info->selection] = qemu_clipboard_info_ref(info); + if (cbinfo[info->selection] != info) { + qemu_clipboard_info_unref(cbinfo[info->selection]); + cbinfo[info->selection] = qemu_clipboard_info_ref(info); + } } QemuClipboardInfo *qemu_clipboard_info(QemuClipboardSelection selection) diff --git a/ui/cocoa.m b/ui/cocoa.m index b6e70e9134..c88149852b 100644 --- a/ui/cocoa.m +++ b/ui/cocoa.m @@ -83,7 +83,7 @@ static void cocoa_switch(DisplayChangeListener *dcl, static void cocoa_refresh(DisplayChangeListener *dcl); -static NSWindow *normalWindow, *about_window; +static NSWindow *normalWindow; static const DisplayChangeListenerOps dcl_ops = { .dpy_name = "cocoa", .dpy_gfx_update = cocoa_update, @@ -1140,7 +1140,6 @@ QemuCocoaView *cocoaView; - (BOOL)verifyQuit; - (void)openDocumentation:(NSString *)filename; - (IBAction) do_about_menu_item: (id) sender; -- (void)make_about_window; - (void)adjustSpeed:(id)sender; @end @@ -1186,8 +1185,6 @@ QemuCocoaView *cocoaView; [pauseLabel setFont: [NSFont fontWithName: @"Helvetica" size: 90]]; [pauseLabel setTextColor: [NSColor blackColor]]; [pauseLabel sizeToFit]; - - [self make_about_window]; } return self; } @@ -1471,92 +1468,29 @@ QemuCocoaView *cocoaView; /* The action method for the About menu item */ - (IBAction) do_about_menu_item: (id) sender { - [about_window makeKeyAndOrderFront: nil]; -} - -/* Create and display the about dialog */ -- (void)make_about_window -{ - /* Make the window */ - int x = 0, y = 0, about_width = 400, about_height = 200; - NSRect window_rect = NSMakeRect(x, y, about_width, about_height); - about_window = [[NSWindow alloc] initWithContentRect:window_rect - styleMask:NSWindowStyleMaskTitled | NSWindowStyleMaskClosable | - NSWindowStyleMaskMiniaturizable - backing:NSBackingStoreBuffered - defer:NO]; - [about_window setTitle: @"About"]; - [about_window setReleasedWhenClosed: NO]; - [about_window center]; - NSView *superView = [about_window contentView]; - - /* Create the dimensions of the picture */ - int picture_width = 80, picture_height = 80; - x = (about_width - picture_width)/2; - y = about_height - picture_height - 10; - NSRect picture_rect = NSMakeRect(x, y, picture_width, picture_height); - - /* Make the picture of QEMU */ - NSImageView *picture_view = [[NSImageView alloc] initWithFrame: - picture_rect]; - char *qemu_image_path_c = get_relocated_path(CONFIG_QEMU_ICONDIR "/hicolor/512x512/apps/qemu.png"); - NSString *qemu_image_path = [NSString stringWithUTF8String:qemu_image_path_c]; - g_free(qemu_image_path_c); - NSImage *qemu_image = [[NSImage alloc] initWithContentsOfFile:qemu_image_path]; - [picture_view setImage: qemu_image]; - [picture_view setImageScaling: NSImageScaleProportionallyUpOrDown]; - [superView addSubview: picture_view]; - - /* Make the name label */ - NSBundle *bundle = [NSBundle mainBundle]; - if (bundle) { - x = 0; - y = y - 25; - int name_width = about_width, name_height = 20; - NSRect name_rect = NSMakeRect(x, y, name_width, name_height); - NSTextField *name_label = [[NSTextField alloc] initWithFrame: name_rect]; - [name_label setEditable: NO]; - [name_label setBezeled: NO]; - [name_label setDrawsBackground: NO]; - [name_label setAlignment: NSTextAlignmentCenter]; - NSString *qemu_name = [[bundle executablePath] lastPathComponent]; - [name_label setStringValue: qemu_name]; - [superView addSubview: name_label]; + NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; + char *icon_path_c = get_relocated_path(CONFIG_QEMU_ICONDIR "/hicolor/512x512/apps/qemu.png"); + NSString *icon_path = [NSString stringWithUTF8String:icon_path_c]; + g_free(icon_path_c); + NSImage *icon = [[NSImage alloc] initWithContentsOfFile:icon_path]; + NSString *version = @"QEMU emulator version " QEMU_FULL_VERSION; + NSString *copyright = @QEMU_COPYRIGHT; + NSDictionary *options; + if (icon) { + options = @{ + NSAboutPanelOptionApplicationIcon : icon, + NSAboutPanelOptionApplicationVersion : version, + @"Copyright" : copyright, + }; + [icon release]; + } else { + options = @{ + NSAboutPanelOptionApplicationVersion : version, + @"Copyright" : copyright, + }; } - - /* Set the version label's attributes */ - x = 0; - y = 50; - int version_width = about_width, version_height = 20; - NSRect version_rect = NSMakeRect(x, y, version_width, version_height); - NSTextField *version_label = [[NSTextField alloc] initWithFrame: - version_rect]; - [version_label setEditable: NO]; - [version_label setBezeled: NO]; - [version_label setAlignment: NSTextAlignmentCenter]; - [version_label setDrawsBackground: NO]; - - /* Create the version string*/ - NSString *version_string; - version_string = [[NSString alloc] initWithFormat: - @"QEMU emulator version %s", QEMU_FULL_VERSION]; - [version_label setStringValue: version_string]; - [superView addSubview: version_label]; - - /* Make copyright label */ - x = 0; - y = 35; - int copyright_width = about_width, copyright_height = 20; - NSRect copyright_rect = NSMakeRect(x, y, copyright_width, copyright_height); - NSTextField *copyright_label = [[NSTextField alloc] initWithFrame: - copyright_rect]; - [copyright_label setEditable: NO]; - [copyright_label setBezeled: NO]; - [copyright_label setDrawsBackground: NO]; - [copyright_label setAlignment: NSTextAlignmentCenter]; - [copyright_label setStringValue: [NSString stringWithFormat: @"%s", - QEMU_COPYRIGHT]]; - [superView addSubview: copyright_label]; + [NSApp orderFrontStandardAboutPanelWithOptions:options]; + [pool release]; } /* Used by the Speed menu items */ @@ -1611,11 +1545,15 @@ static void create_initial_menus(void) NSMenuItem *menuItem; [NSApp setMainMenu:[[NSMenu alloc] init]]; + [NSApp setServicesMenu:[[NSMenu alloc] initWithTitle:@"Services"]]; // Application menu menu = [[NSMenu alloc] initWithTitle:@""]; [menu addItemWithTitle:@"About QEMU" action:@selector(do_about_menu_item:) keyEquivalent:@""]; // About QEMU [menu addItem:[NSMenuItem separatorItem]]; //Separator + menuItem = [menu addItemWithTitle:@"Services" action:nil keyEquivalent:@""]; + [menuItem setSubmenu:[NSApp servicesMenu]]; + [menu addItem:[NSMenuItem separatorItem]]; [menu addItemWithTitle:@"Hide QEMU" action:@selector(hide:) keyEquivalent:@"h"]; //Hide QEMU menuItem = (NSMenuItem *)[menu addItemWithTitle:@"Hide Others" action:@selector(hideOtherApplications:) keyEquivalent:@"h"]; // Hide Others [menuItem setKeyEquivalentModifierMask:(NSEventModifierFlagOption|NSEventModifierFlagCommand)]; diff --git a/ui/console-gl.c b/ui/console-gl.c index 7c9894a51d..8e3c9a3c8c 100644 --- a/ui/console-gl.c +++ b/ui/console-gl.c @@ -49,6 +49,10 @@ void surface_gl_create_texture(QemuGLShader *gls, assert(gls); assert(QEMU_IS_ALIGNED(surface_stride(surface), surface_bytes_per_pixel(surface))); + if (surface->texture) { + return; + } + switch (surface->format) { case PIXMAN_BE_b8g8r8x8: case PIXMAN_BE_b8g8r8a8: diff --git a/ui/console.c b/ui/console.c index 40eebb6d2c..365a2c14b8 100644 --- a/ui/console.c +++ b/ui/console.c @@ -1860,7 +1860,9 @@ void dpy_gl_scanout_disable(QemuConsole *con) con->scanout.kind = SCANOUT_NONE; } QLIST_FOREACH(dcl, &s->listeners, next) { - dcl->ops->dpy_gl_scanout_disable(dcl); + if (dcl->ops->dpy_gl_scanout_disable) { + dcl->ops->dpy_gl_scanout_disable(dcl); + } } } @@ -1881,10 +1883,12 @@ void dpy_gl_scanout_texture(QemuConsole *con, x, y, width, height }; QLIST_FOREACH(dcl, &s->listeners, next) { - dcl->ops->dpy_gl_scanout_texture(dcl, backing_id, - backing_y_0_top, - backing_width, backing_height, - x, y, width, height); + if (dcl->ops->dpy_gl_scanout_texture) { + dcl->ops->dpy_gl_scanout_texture(dcl, backing_id, + backing_y_0_top, + backing_width, backing_height, + x, y, width, height); + } } } @@ -1897,7 +1901,9 @@ void dpy_gl_scanout_dmabuf(QemuConsole *con, con->scanout.kind = SCANOUT_DMABUF; con->scanout.dmabuf = dmabuf; QLIST_FOREACH(dcl, &s->listeners, next) { - dcl->ops->dpy_gl_scanout_dmabuf(dcl, dmabuf); + if (dcl->ops->dpy_gl_scanout_dmabuf) { + dcl->ops->dpy_gl_scanout_dmabuf(dcl, dmabuf); + } } } @@ -1951,7 +1957,9 @@ void dpy_gl_update(QemuConsole *con, graphic_hw_gl_block(con, true); QLIST_FOREACH(dcl, &s->listeners, next) { - dcl->ops->dpy_gl_update(dcl, x, y, w, h); + if (dcl->ops->dpy_gl_update) { + dcl->ops->dpy_gl_update(dcl, x, y, w, h); + } } graphic_hw_gl_block(con, false); } @@ -2392,13 +2400,12 @@ static void vc_chr_open(Chardev *chr, void qemu_console_resize(QemuConsole *s, int width, int height) { - DisplaySurface *surface = qemu_console_surface(s); + DisplaySurface *surface; assert(s->console_type == GRAPHIC_CONSOLE); - if (surface && (surface->flags & QEMU_ALLOCATED_FLAG) && - pixman_image_get_width(surface->image) == width && - pixman_image_get_height(surface->image) == height) { + if (qemu_console_get_width(s, -1) == width && + qemu_console_get_height(s, -1) == height) { return; } diff --git a/util/async.c b/util/async.c index 08d25feef5..2ea1172f3e 100644 --- a/util/async.c +++ b/util/async.c @@ -32,6 +32,7 @@ #include "qemu/rcu_queue.h" #include "block/raw-aio.h" #include "qemu/coroutine_int.h" +#include "qemu/coroutine-tls.h" #include "trace.h" /***********************************************************/ @@ -675,12 +676,13 @@ void aio_context_release(AioContext *ctx) qemu_rec_mutex_unlock(&ctx->lock); } -static __thread AioContext *my_aiocontext; +QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext) AioContext *qemu_get_current_aio_context(void) { - if (my_aiocontext) { - return my_aiocontext; + AioContext *ctx = get_my_aiocontext(); + if (ctx) { + return ctx; } if (qemu_mutex_iothread_locked()) { /* Possibly in a vCPU thread. */ @@ -691,6 +693,6 @@ AioContext *qemu_get_current_aio_context(void) void qemu_set_current_aio_context(AioContext *ctx) { - assert(!my_aiocontext); - my_aiocontext = ctx; + assert(!get_my_aiocontext()); + set_my_aiocontext(ctx); } diff --git a/util/atomic64.c b/util/atomic64.c index 22983a970f..c20d071d8e 100644 --- a/util/atomic64.c +++ b/util/atomic64.c @@ -8,6 +8,7 @@ #include "qemu/atomic.h" #include "qemu/thread.h" #include "qemu/cacheinfo.h" +#include "qemu/memalign.h" #ifdef CONFIG_ATOMIC64 #error This file must only be compiled if !CONFIG_ATOMIC64 diff --git a/util/hbitmap.c b/util/hbitmap.c index 305b894a63..dd0501d9a7 100644 --- a/util/hbitmap.c +++ b/util/hbitmap.c @@ -301,6 +301,39 @@ bool hbitmap_next_dirty_area(const HBitmap *hb, int64_t start, int64_t end, return true; } +bool hbitmap_status(const HBitmap *hb, int64_t start, int64_t count, + int64_t *pnum) +{ + int64_t next_dirty, next_zero; + + assert(start >= 0); + assert(count > 0); + assert(start + count <= hb->orig_size); + + next_dirty = hbitmap_next_dirty(hb, start, count); + if (next_dirty == -1) { + *pnum = count; + return false; + } + + if (next_dirty > start) { + *pnum = next_dirty - start; + return false; + } + + assert(next_dirty == start); + + next_zero = hbitmap_next_zero(hb, start, count); + if (next_zero == -1) { + *pnum = count; + return true; + } + + assert(next_zero > start); + *pnum = next_zero - start; + return false; +} + bool hbitmap_empty(const HBitmap *hb) { return hb->count == 0; diff --git a/util/memalign.c b/util/memalign.c new file mode 100644 index 0000000000..c199ae7073 --- /dev/null +++ b/util/memalign.c @@ -0,0 +1,92 @@ +/* + * memalign.c: Allocate an aligned memory region + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010-2016 Red Hat, Inc. + * Copyright (c) 2022 Linaro Ltd + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/host-utils.h" +#include "qemu/memalign.h" +#include "trace.h" + +void *qemu_try_memalign(size_t alignment, size_t size) +{ + void *ptr; + + if (alignment < sizeof(void*)) { + alignment = sizeof(void*); + } else { + g_assert(is_power_of_2(alignment)); + } + + /* + * Handling of 0 allocations varies among the different + * platform APIs (for instance _aligned_malloc() will + * fail) -- ensure that we always return a valid non-NULL + * pointer that can be freed by qemu_vfree(). + */ + if (size == 0) { + size++; + } +#if defined(CONFIG_POSIX_MEMALIGN) + int ret; + ret = posix_memalign(&ptr, alignment, size); + if (ret != 0) { + errno = ret; + ptr = NULL; + } +#elif defined(CONFIG_ALIGNED_MALLOC) + ptr = _aligned_malloc(size, alignment); +#elif defined(CONFIG_VALLOC) + ptr = valloc(size); +#elif defined(CONFIG_MEMALIGN) + ptr = memalign(alignment, size); +#else + #error No function to allocate aligned memory available +#endif + trace_qemu_memalign(alignment, size, ptr); + return ptr; +} + +void *qemu_memalign(size_t alignment, size_t size) +{ + void *p = qemu_try_memalign(alignment, size); + if (p) { + return p; + } + fprintf(stderr, + "qemu_memalign: failed to allocate %zu bytes at alignment %zu: %s\n", + size, alignment, strerror(errno)); + abort(); +} + +void qemu_vfree(void *ptr) +{ + trace_qemu_vfree(ptr); +#if !defined(CONFIG_POSIX_MEMALIGN) && defined(CONFIG_ALIGNED_MALLOC) + /* Only Windows _aligned_malloc needs a special free function */ + _aligned_free(ptr); +#else + free(ptr); +#endif +} diff --git a/util/meson.build b/util/meson.build index 3736988b9f..f6ee74ad0c 100644 --- a/util/meson.build +++ b/util/meson.build @@ -51,6 +51,7 @@ util_ss.add(when: 'CONFIG_POSIX', if_true: files('drm.c')) util_ss.add(files('guest-random.c')) util_ss.add(files('yank.c')) util_ss.add(files('int128.c')) +util_ss.add(files('memalign.c')) if have_user util_ss.add(files('selfmap.c')) diff --git a/util/osdep.c b/util/osdep.c index 723cdcb004..7c4deda6fe 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -33,7 +33,6 @@ extern int madvise(char *, size_t, int); #endif -#include <dirent.h> #include "qemu-common.h" #include "qemu/cutils.h" #include "qemu/sockets.h" @@ -619,23 +618,3 @@ writev(int fd, const struct iovec *iov, int iov_cnt) return readv_writev(fd, iov, iov_cnt, true); } #endif - -struct dirent * -qemu_dirent_dup(struct dirent *dent) -{ - size_t sz = 0; -#if defined _DIRENT_HAVE_D_RECLEN - /* Avoid use of strlen() if platform supports d_reclen. */ - sz = dent->d_reclen; -#endif - /* - * Test sz for zero even if d_reclen is available - * because some drivers may set d_reclen to zero. - */ - if (sz == 0) { - /* Fallback to the most portable way. */ - sz = offsetof(struct dirent, d_name) + - strlen(dent->d_name) + 1; - } - return g_memdup(dent, sz); -} diff --git a/util/oslib-posix.c b/util/oslib-posix.c index f2be7321c5..2ebfb75057 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -199,46 +199,6 @@ fail_close: return false; } -void *qemu_oom_check(void *ptr) -{ - if (ptr == NULL) { - fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno)); - abort(); - } - return ptr; -} - -void *qemu_try_memalign(size_t alignment, size_t size) -{ - void *ptr; - - if (alignment < sizeof(void*)) { - alignment = sizeof(void*); - } else { - g_assert(is_power_of_2(alignment)); - } - -#if defined(CONFIG_POSIX_MEMALIGN) - int ret; - ret = posix_memalign(&ptr, alignment, size); - if (ret != 0) { - errno = ret; - ptr = NULL; - } -#elif defined(CONFIG_BSD) - ptr = valloc(size); -#else - ptr = memalign(alignment, size); -#endif - trace_qemu_memalign(alignment, size, ptr); - return ptr; -} - -void *qemu_memalign(size_t alignment, size_t size) -{ - return qemu_oom_check(qemu_try_memalign(alignment, size)); -} - /* alloc shared memory pages */ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared, bool noreserve) @@ -260,12 +220,6 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared, return ptr; } -void qemu_vfree(void *ptr) -{ - trace_qemu_vfree(ptr); - free(ptr); -} - void qemu_anon_ram_free(void *ptr, size_t size) { trace_qemu_anon_ram_free(ptr, size); diff --git a/util/oslib-win32.c b/util/oslib-win32.c index af559ef339..4b1ce0be4b 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -44,35 +44,6 @@ /* this must come after including "trace.h" */ #include <shlobj.h> -void *qemu_oom_check(void *ptr) -{ - if (ptr == NULL) { - fprintf(stderr, "Failed to allocate memory: %lu\n", GetLastError()); - abort(); - } - return ptr; -} - -void *qemu_try_memalign(size_t alignment, size_t size) -{ - void *ptr; - - g_assert(size != 0); - if (alignment < sizeof(void *)) { - alignment = sizeof(void *); - } else { - g_assert(is_power_of_2(alignment)); - } - ptr = _aligned_malloc(size, alignment); - trace_qemu_memalign(alignment, size, ptr); - return ptr; -} - -void *qemu_memalign(size_t alignment, size_t size) -{ - return qemu_oom_check(qemu_try_memalign(alignment, size)); -} - static int get_allocation_granularity(void) { SYSTEM_INFO system_info; @@ -104,12 +75,6 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared, return ptr; } -void qemu_vfree(void *ptr) -{ - trace_qemu_vfree(ptr); - _aligned_free(ptr); -} - void qemu_anon_ram_free(void *ptr, size_t size) { trace_qemu_anon_ram_free(ptr, size); diff --git a/util/qht.c b/util/qht.c index 079605121b..065fc501f4 100644 --- a/util/qht.c +++ b/util/qht.c @@ -69,6 +69,7 @@ #include "qemu/qht.h" #include "qemu/atomic.h" #include "qemu/rcu.h" +#include "qemu/memalign.h" //#define QHT_DEBUG diff --git a/util/rcu.c b/util/rcu.c index c91da9f137..b6d6c71cff 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -65,7 +65,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr) /* Written to only by each individual reader. Read by both the reader and the * writers. */ -__thread struct rcu_reader_data rcu_reader; +QEMU_DEFINE_CO_TLS(struct rcu_reader_data, rcu_reader) /* Protected by rcu_registry_lock. */ typedef QLIST_HEAD(, rcu_reader_data) ThreadList; @@ -355,23 +355,23 @@ void drain_call_rcu(void) void rcu_register_thread(void) { - assert(rcu_reader.ctr == 0); + assert(get_ptr_rcu_reader()->ctr == 0); qemu_mutex_lock(&rcu_registry_lock); - QLIST_INSERT_HEAD(®istry, &rcu_reader, node); + QLIST_INSERT_HEAD(®istry, get_ptr_rcu_reader(), node); qemu_mutex_unlock(&rcu_registry_lock); } void rcu_unregister_thread(void) { qemu_mutex_lock(&rcu_registry_lock); - QLIST_REMOVE(&rcu_reader, node); + QLIST_REMOVE(get_ptr_rcu_reader(), node); qemu_mutex_unlock(&rcu_registry_lock); } void rcu_add_force_rcu_notifier(Notifier *n) { qemu_mutex_lock(&rcu_registry_lock); - notifier_list_add(&rcu_reader.force_rcu, n); + notifier_list_add(&get_ptr_rcu_reader()->force_rcu, n); qemu_mutex_unlock(&rcu_registry_lock); } |