From 49df6397edfc5a8ba8ca813b51fb9729d8e94b40 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Wed, 29 Jul 2015 23:21:40 -0700 Subject: KVM: x86: Split the APIC from the rest of IRQCHIP. First patch in a series which enables the relocation of the PIC/IOAPIC to userspace. Adds capability KVM_CAP_SPLIT_IRQCHIP; KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the rest of the irqchip. Compile tested for x86. Signed-off-by: Steve Rutherford Suggested-by: Andrew Honig Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1bef9e21e725..354f147647ab 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1002,6 +1002,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) #endif int kvm_setup_default_irq_routing(struct kvm *kvm); +int kvm_setup_empty_irq_routing(struct kvm *kvm); int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a9256f0331ae..ed00f8fc9ea2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -824,6 +824,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_MULTI_ADDRESS_SPACE 118 #define KVM_CAP_GUEST_DEBUG_HW_BPS 119 #define KVM_CAP_GUEST_DEBUG_HW_WPS 120 +#define KVM_CAP_SPLIT_IRQCHIP 121 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-55-g7522 From 7543a635aa09eb138b2cbf60ac3ff19503ae6954 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Wed, 29 Jul 2015 23:21:41 -0700 Subject: KVM: x86: Add KVM exit for IOAPIC EOIs Adds KVM_EXIT_IOAPIC_EOI which allows the kernel to EOI level-triggered IOAPIC interrupts. Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs to be informed (which is identical to the EOI_EXIT_BITMAP field used by modern x86 processors, but can also be used to elide kvm IOAPIC EOI exits on older processors). [Note: A prototype using ResampleFDs found that decoupling the EOI from the VCPU's thread made it possible for the VCPU to not see a recent EOI after reentering the guest. This does not match real hardware.] Compile tested for Intel x86. Signed-off-by: Steve Rutherford Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 12 ++++++++++++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/lapic.c | 24 +++++++++++++++++------- arch/x86/kvm/x86.c | 11 +++++++++++ include/linux/kvm_host.h | 2 +- include/uapi/linux/kvm.h | 5 +++++ 6 files changed, 48 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 43e0816d0de1..0d14bf5db534 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3309,6 +3309,18 @@ Valid values for 'type' are: to ignore the request, or to gather VM memory core dump and/or reset/shutdown of the VM. + /* KVM_EXIT_IOAPIC_EOI */ + struct { + __u8 vector; + } eoi; + +Indicates that the VCPU's in-kernel local APIC received an EOI for a +level-triggered IOAPIC interrupt. This exit only triggers when the +IOAPIC is implemented in userspace (i.e. KVM_CAP_SPLIT_IRQCHIP is enabled); +the userspace IOAPIC should process the EOI and retrigger the interrupt if +it is still asserted. Vector is the LAPIC interrupt vector for which the +EOI was received. + /* Fix the size of the union. */ char padding[256]; }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index befcf555bddc..af09fa1d1be7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -574,6 +574,8 @@ struct kvm_vcpu_arch { struct { bool pv_unhalted; } pv; + + int pending_ioapic_eoi; }; struct kvm_lpage_info { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e05946c36b87..ef70f6f3a37a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -877,15 +877,25 @@ static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) { - if (kvm_ioapic_handles_vector(apic, vector)) { - int trigger_mode; - if (apic_test_vector(vector, apic->regs + APIC_TMR)) - trigger_mode = IOAPIC_LEVEL_TRIG; - else - trigger_mode = IOAPIC_EDGE_TRIG; + int trigger_mode; + + /* Eoi the ioapic only if the ioapic doesn't own the vector. */ + if (!kvm_ioapic_handles_vector(apic, vector)) + return; - kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); + /* Request a KVM exit to inform the userspace IOAPIC. */ + if (irqchip_split(apic->vcpu->kvm)) { + apic->vcpu->arch.pending_ioapic_eoi = vector; + kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu); + return; } + + if (apic_test_vector(vector, apic->regs + APIC_TMR)) + trigger_mode = IOAPIC_LEVEL_TRIG; + else + trigger_mode = IOAPIC_EDGE_TRIG; + + kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); } static int apic_set_eoi(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f720774a4797..27429daa054e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6271,6 +6271,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_pmu_handle_event(vcpu); if (kvm_check_request(KVM_REQ_PMI, vcpu)) kvm_pmu_deliver_pmi(vcpu); + if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) { + BUG_ON(vcpu->arch.pending_ioapic_eoi > 255); + if (test_bit(vcpu->arch.pending_ioapic_eoi, + (void *) vcpu->arch.eoi_exit_bitmap)) { + vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI; + vcpu->run->eoi.vector = + vcpu->arch.pending_ioapic_eoi; + r = 0; + goto out; + } + } if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) vcpu_scan_ioapic(vcpu); if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 354f147647ab..3b33215ed447 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -140,6 +140,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_APIC_PAGE_RELOAD 25 #define KVM_REQ_SMI 26 #define KVM_REQ_HV_CRASH 27 +#define KVM_REQ_IOAPIC_EOI_EXIT 28 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -1146,4 +1147,3 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) } #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ #endif - diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ed00f8fc9ea2..12e3afbf0f47 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -183,6 +183,7 @@ struct kvm_s390_skeys { #define KVM_EXIT_EPR 23 #define KVM_EXIT_SYSTEM_EVENT 24 #define KVM_EXIT_S390_STSI 25 +#define KVM_EXIT_IOAPIC_EOI 26 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -333,6 +334,10 @@ struct kvm_run { __u8 sel1; __u16 sel2; } s390_stsi; + /* KVM_EXIT_IOAPIC_EOI */ + struct { + __u8 vector; + } eoi; /* Fix the size of the union. */ char padding[256]; }; -- cgit v1.2.3-55-g7522 From b053b2aef25d00773fa6762dcd4b7f5c9c42d171 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Wed, 29 Jul 2015 23:32:35 -0700 Subject: KVM: x86: Add EOI exit bitmap inference In order to support a userspace IOAPIC interacting with an in kernel APIC, the EOI exit bitmaps need to be configurable. If the IOAPIC is in userspace (i.e. the irqchip has been split), the EOI exit bitmaps will be set whenever the GSI Routes are configured. In particular, for the low MSI routes are reservable for userspace IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the destination vector of the route will be set for the destination VCPU. The intention is for the userspace IOAPICs to use the reservable MSI routes to inject interrupts into the guest. This is a slight abuse of the notion of an MSI Route, given that MSIs classically bypass the IOAPIC. It might be worthwhile to add an additional route type to improve clarity. Compile tested for Intel x86. Signed-off-by: Steve Rutherford Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 9 ++++++--- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/ioapic.h | 2 ++ arch/x86/kvm/irq_comm.c | 42 +++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/lapic.c | 3 +-- arch/x86/kvm/x86.c | 9 ++++++++- include/linux/kvm_host.h | 16 +++++++++++++++ virt/kvm/irqchip.c | 12 ++--------- 8 files changed, 78 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 0d14bf5db534..89e71648d748 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3642,7 +3642,7 @@ KVM handlers should exit to userspace with rc = -EREMOTE. 7.5 KVM_CAP_SPLIT_IRQCHIP Architectures: x86 -Parameters: None +Parameters: args[0] - number of routes reserved for userspace IOAPICs Returns: 0 on success, -1 on error Create a local apic for each processor in the kernel. This can be used @@ -3650,8 +3650,11 @@ instead of KVM_CREATE_IRQCHIP if the userspace VMM wishes to emulate the IOAPIC and PIC (and also the PIT, even though this has to be enabled separately). -This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in kernel -IOAPIC or PIC. This also enables in kernel routing of interrupt requests. +This capability also enables in kernel routing of interrupt requests; +when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type are +used in the IRQ routing table. The first args[0] MSI routes are reserved +for the IOAPIC pins. Whenever the LAPIC receives an EOI for these routes, +a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace. Fails if VCPU has already been created, or if the irqchip is already in the kernel (i.e. KVM_CREATE_IRQCHIP has already been called). diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index af09fa1d1be7..7a5f9debbcd8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -688,6 +688,7 @@ struct kvm_arch { u64 disabled_quirks; bool irqchip_split; + u8 nr_reserved_ioapic_pins; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index a8842c0dee73..084617d37c74 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -9,6 +9,7 @@ struct kvm; struct kvm_vcpu; #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS +#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ #define IOAPIC_EDGE_TRIG 0 #define IOAPIC_LEVEL_TRIG 1 @@ -121,5 +122,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); #endif diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 67f6b62a6814..177460998bb0 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm) { return kvm_set_irq_routing(kvm, empty_routing, 0, 0); } + +void kvm_arch_irq_routing_update(struct kvm *kvm) +{ + if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm)) + return; + kvm_make_scan_ioapic_request(kvm); +} + +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_kernel_irq_routing_entry *entry; + struct kvm_irq_routing_table *table; + u32 i, nr_ioapic_pins; + int idx; + + /* kvm->irq_routing must be read after clearing + * KVM_SCAN_IOAPIC. */ + smp_mb(); + idx = srcu_read_lock(&kvm->irq_srcu); + table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); + nr_ioapic_pins = min_t(u32, table->nr_rt_entries, + kvm->arch.nr_reserved_ioapic_pins); + for (i = 0; i < nr_ioapic_pins; ++i) { + hlist_for_each_entry(entry, &table->map[i], link) { + u32 dest_id, dest_mode; + + if (entry->type != KVM_IRQ_ROUTING_MSI) + continue; + dest_id = (entry->msi.address_lo >> 12) & 0xff; + dest_mode = (entry->msi.address_lo >> 2) & 0x1; + if (kvm_apic_match_dest(vcpu, NULL, 0, dest_id, + dest_mode)) { + u32 vector = entry->msi.data & 0xff; + + __set_bit(vector, + (unsigned long *) eoi_exit_bitmap); + } + } + } + srcu_read_unlock(&kvm->irq_srcu, idx); +} diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ef70f6f3a37a..2f4c0d0cbe0a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -209,8 +209,7 @@ out: if (old) kfree_rcu(old, rcu); - if (ioapic_in_kernel(kvm)) - kvm_vcpu_request_scan_ioapic(kvm); + kvm_make_scan_ioapic_request(kvm); } static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 27429daa054e..4aeed2086c5e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3558,6 +3558,9 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, break; case KVM_CAP_SPLIT_IRQCHIP: { mutex_lock(&kvm->lock); + r = -EINVAL; + if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS) + goto split_irqchip_unlock; r = -EEXIST; if (irqchip_in_kernel(kvm)) goto split_irqchip_unlock; @@ -3569,6 +3572,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, /* Pairs with irqchip_in_kernel. */ smp_wmb(); kvm->arch.irqchip_split = true; + kvm->arch.nr_reserved_ioapic_pins = cap->args[0]; r = 0; split_irqchip_unlock: mutex_unlock(&kvm->lock); @@ -6167,7 +6171,10 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8); - kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap); + if (irqchip_split(vcpu->kvm)) + kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap); + else + kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap); kvm_x86_ops->load_eoi_exitmap(vcpu); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3b33215ed447..d754f2d826e9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -330,6 +330,18 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +struct kvm_irq_routing_table { + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; + u32 nr_rt_entries; + /* + * Array indexed by gsi. Each entry contains list of irq chips + * the gsi is connected to. + */ + struct hlist_head map[0]; +}; +#endif + #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 #endif @@ -456,10 +468,14 @@ void vcpu_put(struct kvm_vcpu *vcpu); #ifdef __KVM_HAVE_IOAPIC void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); +void kvm_arch_irq_routing_update(struct kvm *kvm); #else static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) { } +static inline void kvm_arch_irq_routing_update(struct kvm *kvm) +{ +} #endif #ifdef CONFIG_HAVE_KVM_IRQFD diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index d7ea8e20dae4..716a1c4db528 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -31,16 +31,6 @@ #include #include "irq.h" -struct kvm_irq_routing_table { - int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; - u32 nr_rt_entries; - /* - * Array indexed by gsi. Each entry contains list of irq chips - * the gsi is connected to. - */ - struct hlist_head map[0]; -}; - int kvm_irq_map_gsi(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *entries, int gsi) { @@ -231,6 +221,8 @@ int kvm_set_irq_routing(struct kvm *kvm, kvm_irq_routing_update(kvm); mutex_unlock(&kvm->irq_lock); + kvm_arch_irq_routing_update(kvm); + synchronize_srcu_expedited(&kvm->irq_srcu); new = old; -- cgit v1.2.3-55-g7522 From e9ea5069d9e569c32ab913c39467df32e056b3a7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 15 Sep 2015 14:41:59 +0800 Subject: kvm: add capability for any-length ioeventfds Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Jason Wang Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 6 +++++- include/uapi/linux/kvm.h | 1 + virt/kvm/eventfd.c | 4 +--- virt/kvm/kvm_main.c | 1 + 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e3e9c41721a2..34cc068e81ea 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1604,7 +1604,7 @@ provided event instead of triggering an exit. struct kvm_ioeventfd { __u64 datamatch; __u64 addr; /* legal pio/mmio address */ - __u32 len; /* 1, 2, 4, or 8 bytes */ + __u32 len; /* 0, 1, 2, 4, or 8 bytes */ __s32 fd; __u32 flags; __u8 pad[36]; @@ -1627,6 +1627,10 @@ to the registered address is equal to datamatch in struct kvm_ioeventfd. For virtio-ccw devices, addr contains the subchannel id and datamatch the virtqueue index. +With KVM_CAP_IOEVENTFD_ANY_LENGTH, a zero length ioeventfd is allowed, and +the kernel will ignore the length of guest write and may get a faster vmexit. +The speedup may only apply to specific architectures, but the ioeventfd will +work anyway. 4.60 KVM_DIRTY_TLB diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 12e3afbf0f47..03f3618612aa 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -830,6 +830,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_GUEST_DEBUG_HW_BPS 119 #define KVM_CAP_GUEST_DEBUG_HW_WPS 120 #define KVM_CAP_SPLIT_IRQCHIP 121 +#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 79db45336e3a..ac89299b8699 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -914,9 +914,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return -EINVAL; /* ioeventfd with no length can't be combined with DATAMATCH */ - if (!args->len && - args->flags & (KVM_IOEVENTFD_FLAG_PIO | - KVM_IOEVENTFD_FLAG_DATAMATCH)) + if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)) return -EINVAL; ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 23116dcb2129..afd7ae6aec65 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2718,6 +2718,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_IRQFD: case KVM_CAP_IRQFD_RESAMPLE: #endif + case KVM_CAP_IOEVENTFD_ANY_LENGTH: case KVM_CAP_CHECK_EXTENSION_VM: return 1; #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING -- cgit v1.2.3-55-g7522 From e516cebb4f2b2057a5a421fea589079502acfff6 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Wed, 16 Sep 2015 12:29:48 +0300 Subject: kvm/x86: Hyper-V HV_X64_MSR_RESET msr HV_X64_MSR_RESET msr is used by Hyper-V based Windows guest to reset guest VM by hypervisor. Necessary to support loading of winhv.sys in guest, which in turn is required to support Windows VMBus. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Paolo Bonzini CC: Gleb Natapov Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/hyperv.h | 3 +++ arch/x86/kvm/hyperv.c | 10 ++++++++++ arch/x86/kvm/x86.c | 7 +++++++ include/linux/kvm_host.h | 1 + 4 files changed, 21 insertions(+) (limited to 'include') diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index f0412c50c47b..dab584bf7ddf 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -153,6 +153,9 @@ /* MSR used to provide vcpu index */ #define HV_X64_MSR_VP_INDEX 0x40000002 +/* MSR used to reset the guest OS. */ +#define HV_X64_MSR_RESET 0x40000003 + /* MSR used to read the per-partition time reference counter */ #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index a8160d2ae362..0ad11a232474 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -41,6 +41,7 @@ static bool kvm_hv_msr_partition_wide(u32 msr) case HV_X64_MSR_TIME_REF_COUNT: case HV_X64_MSR_CRASH_CTL: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + case HV_X64_MSR_RESET: r = true; break; } @@ -163,6 +164,12 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, data); case HV_X64_MSR_CRASH_CTL: return kvm_hv_msr_set_crash_ctl(vcpu, data, host); + case HV_X64_MSR_RESET: + if (data == 1) { + vcpu_debug(vcpu, "hyper-v reset requested\n"); + kvm_make_request(KVM_REQ_HV_RESET, vcpu); + } + break; default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -241,6 +248,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) pdata); case HV_X64_MSR_CRASH_CTL: return kvm_hv_msr_get_crash_ctl(vcpu, pdata); + case HV_X64_MSR_RESET: + data = 0; + break; default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b90ad01c617d..297b36fa3b80 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -952,6 +952,7 @@ static u32 emulated_msrs[] = { HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, + HV_X64_MSR_RESET, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, @@ -6321,6 +6322,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } + if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET; + r = 0; + goto out; + } } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d754f2d826e9..cd0ba2e931e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -141,6 +141,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_SMI 26 #define KVM_REQ_HV_CRASH 27 #define KVM_REQ_IOAPIC_EOI_EXIT 28 +#define KVM_REQ_HV_RESET 29 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3-55-g7522 From f73f8173126ba68eb1c42bd9a234a51d78576ca6 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 18 Sep 2015 22:29:39 +0800 Subject: virt: IRQ bypass manager When a physical I/O device is assigned to a virtual machine through facilities like VFIO and KVM, the interrupt for the device generally bounces through the host system before being injected into the VM. However, hardware technologies exist that often allow the host to be bypassed for some of these scenarios. Intel Posted Interrupts allow the specified physical edge interrupts to be directly injected into a guest when delivered to a physical processor while the vCPU is running. ARM IRQ Forwarding allows forwarded physical interrupts to be directly deactivated by the guest. The IRQ bypass manager here is meant to provide the shim to connect interrupt producers, generally the host physical device driver, with interrupt consumers, generally the hypervisor, in order to configure these bypass mechanism. To do this, we base the connection on a shared, opaque token. For KVM-VFIO this is expected to be an eventfd_ctx since this is the connection we already use to connect an eventfd to an irqfd on the in-kernel path. When a producer and consumer with matching tokens is found, callbacks via both registered participants allow the bypass facilities to be automatically enabled. Signed-off-by: Alex Williamson Reviewed-by: Eric Auger Tested-by: Eric Auger Tested-by: Feng Wu Signed-off-by: Feng Wu Signed-off-by: Paolo Bonzini --- MAINTAINERS | 7 ++ include/linux/irqbypass.h | 90 ++++++++++++++++ virt/lib/Kconfig | 2 + virt/lib/Makefile | 1 + virt/lib/irqbypass.c | 257 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 357 insertions(+) create mode 100644 include/linux/irqbypass.h create mode 100644 virt/lib/Kconfig create mode 100644 virt/lib/Makefile create mode 100644 virt/lib/irqbypass.c (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 797236befd27..3b738cb8bdeb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11177,6 +11177,13 @@ L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/via/via-velocity.* +VIRT LIB +M: Alex Williamson +M: Paolo Bonzini +L: kvm@vger.kernel.org +S: Supported +F: virt/lib/ + VIVID VIRTUAL VIDEO DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h new file mode 100644 index 000000000000..1551b5b2f4c2 --- /dev/null +++ b/include/linux/irqbypass.h @@ -0,0 +1,90 @@ +/* + * IRQ offload/bypass manager + * + * Copyright (C) 2015 Red Hat, Inc. + * Copyright (c) 2015 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef IRQBYPASS_H +#define IRQBYPASS_H + +#include + +struct irq_bypass_consumer; + +/* + * Theory of operation + * + * The IRQ bypass manager is a simple set of lists and callbacks that allows + * IRQ producers (ex. physical interrupt sources) to be matched to IRQ + * consumers (ex. virtualization hardware that allows IRQ bypass or offload) + * via a shared token (ex. eventfd_ctx). Producers and consumers register + * independently. When a token match is found, the optional @stop callback + * will be called for each participant. The pair will then be connected via + * the @add_* callbacks, and finally the optional @start callback will allow + * any final coordination. When either participant is unregistered, the + * process is repeated using the @del_* callbacks in place of the @add_* + * callbacks. Match tokens must be unique per producer/consumer, 1:N pairings + * are not supported. + */ + +/** + * struct irq_bypass_producer - IRQ bypass producer definition + * @node: IRQ bypass manager private list management + * @token: opaque token to match between producer and consumer + * @irq: Linux IRQ number for the producer device + * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional) + * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional) + * @stop: Perform any quiesce operations necessary prior to add/del (optional) + * @start: Perform any startup operations necessary after add/del (optional) + * + * The IRQ bypass producer structure represents an interrupt source for + * participation in possible host bypass, for instance an interrupt vector + * for a physical device assigned to a VM. + */ +struct irq_bypass_producer { + struct list_head node; + void *token; + int irq; + int (*add_consumer)(struct irq_bypass_producer *, + struct irq_bypass_consumer *); + void (*del_consumer)(struct irq_bypass_producer *, + struct irq_bypass_consumer *); + void (*stop)(struct irq_bypass_producer *); + void (*start)(struct irq_bypass_producer *); +}; + +/** + * struct irq_bypass_consumer - IRQ bypass consumer definition + * @node: IRQ bypass manager private list management + * @token: opaque token to match between producer and consumer + * @add_producer: Connect the IRQ consumer to an IRQ producer + * @del_producer: Disconnect the IRQ consumer from an IRQ producer + * @stop: Perform any quiesce operations necessary prior to add/del (optional) + * @start: Perform any startup operations necessary after add/del (optional) + * + * The IRQ bypass consumer structure represents an interrupt sink for + * participation in possible host bypass, for instance a hypervisor may + * support offloads to allow bypassing the host entirely or offload + * portions of the interrupt handling to the VM. + */ +struct irq_bypass_consumer { + struct list_head node; + void *token; + int (*add_producer)(struct irq_bypass_consumer *, + struct irq_bypass_producer *); + void (*del_producer)(struct irq_bypass_consumer *, + struct irq_bypass_producer *); + void (*stop)(struct irq_bypass_consumer *); + void (*start)(struct irq_bypass_consumer *); +}; + +int irq_bypass_register_producer(struct irq_bypass_producer *); +void irq_bypass_unregister_producer(struct irq_bypass_producer *); +int irq_bypass_register_consumer(struct irq_bypass_consumer *); +void irq_bypass_unregister_consumer(struct irq_bypass_consumer *); + +#endif /* IRQBYPASS_H */ diff --git a/virt/lib/Kconfig b/virt/lib/Kconfig new file mode 100644 index 000000000000..89a414f815d2 --- /dev/null +++ b/virt/lib/Kconfig @@ -0,0 +1,2 @@ +config IRQ_BYPASS_MANAGER + tristate diff --git a/virt/lib/Makefile b/virt/lib/Makefile new file mode 100644 index 000000000000..901228d1ffbc --- /dev/null +++ b/virt/lib/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_IRQ_BYPASS_MANAGER) += irqbypass.o diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c new file mode 100644 index 000000000000..09a03b5a21ff --- /dev/null +++ b/virt/lib/irqbypass.c @@ -0,0 +1,257 @@ +/* + * IRQ offload/bypass manager + * + * Copyright (C) 2015 Red Hat, Inc. + * Copyright (c) 2015 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Various virtualization hardware acceleration techniques allow bypassing or + * offloading interrupts received from devices around the host kernel. Posted + * Interrupts on Intel VT-d systems can allow interrupts to be received + * directly by a virtual machine. ARM IRQ Forwarding allows forwarded physical + * interrupts to be directly deactivated by the guest. This manager allows + * interrupt producers and consumers to find each other to enable this sort of + * bypass. + */ + +#include +#include +#include +#include + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("IRQ bypass manager utility module"); + +static LIST_HEAD(producers); +static LIST_HEAD(consumers); +static DEFINE_MUTEX(lock); + +/* @lock must be held when calling connect */ +static int __connect(struct irq_bypass_producer *prod, + struct irq_bypass_consumer *cons) +{ + int ret = 0; + + if (prod->stop) + prod->stop(prod); + if (cons->stop) + cons->stop(cons); + + if (prod->add_consumer) + ret = prod->add_consumer(prod, cons); + + if (!ret) { + ret = cons->add_producer(cons, prod); + if (ret && prod->del_consumer) + prod->del_consumer(prod, cons); + } + + if (cons->start) + cons->start(cons); + if (prod->start) + prod->start(prod); + + return ret; +} + +/* @lock must be held when calling disconnect */ +static void __disconnect(struct irq_bypass_producer *prod, + struct irq_bypass_consumer *cons) +{ + if (prod->stop) + prod->stop(prod); + if (cons->stop) + cons->stop(cons); + + cons->del_producer(cons, prod); + + if (prod->del_consumer) + prod->del_consumer(prod, cons); + + if (cons->start) + cons->start(cons); + if (prod->start) + prod->start(prod); +} + +/** + * irq_bypass_register_producer - register IRQ bypass producer + * @producer: pointer to producer structure + * + * Add the provided IRQ producer to the list of producers and connect + * with any matching token found on the IRQ consumers list. + */ +int irq_bypass_register_producer(struct irq_bypass_producer *producer) +{ + struct irq_bypass_producer *tmp; + struct irq_bypass_consumer *consumer; + + might_sleep(); + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + mutex_lock(&lock); + + list_for_each_entry(tmp, &producers, node) { + if (tmp->token == producer->token) { + mutex_unlock(&lock); + module_put(THIS_MODULE); + return -EBUSY; + } + } + + list_for_each_entry(consumer, &consumers, node) { + if (consumer->token == producer->token) { + int ret = __connect(producer, consumer); + if (ret) { + mutex_unlock(&lock); + module_put(THIS_MODULE); + return ret; + } + break; + } + } + + list_add(&producer->node, &producers); + + mutex_unlock(&lock); + + return 0; +} +EXPORT_SYMBOL_GPL(irq_bypass_register_producer); + +/** + * irq_bypass_unregister_producer - unregister IRQ bypass producer + * @producer: pointer to producer structure + * + * Remove a previously registered IRQ producer from the list of producers + * and disconnect it from any connected IRQ consumer. + */ +void irq_bypass_unregister_producer(struct irq_bypass_producer *producer) +{ + struct irq_bypass_producer *tmp; + struct irq_bypass_consumer *consumer; + + might_sleep(); + + if (!try_module_get(THIS_MODULE)) + return; /* nothing in the list anyway */ + + mutex_lock(&lock); + + list_for_each_entry(tmp, &producers, node) { + if (tmp->token != producer->token) + continue; + + list_for_each_entry(consumer, &consumers, node) { + if (consumer->token == producer->token) { + __disconnect(producer, consumer); + break; + } + } + + list_del(&producer->node); + module_put(THIS_MODULE); + break; + } + + mutex_unlock(&lock); + + module_put(THIS_MODULE); +} +EXPORT_SYMBOL_GPL(irq_bypass_unregister_producer); + +/** + * irq_bypass_register_consumer - register IRQ bypass consumer + * @consumer: pointer to consumer structure + * + * Add the provided IRQ consumer to the list of consumers and connect + * with any matching token found on the IRQ producer list. + */ +int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer) +{ + struct irq_bypass_consumer *tmp; + struct irq_bypass_producer *producer; + + if (!consumer->add_producer || !consumer->del_producer) + return -EINVAL; + + might_sleep(); + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + mutex_lock(&lock); + + list_for_each_entry(tmp, &consumers, node) { + if (tmp->token == consumer->token) { + mutex_unlock(&lock); + module_put(THIS_MODULE); + return -EBUSY; + } + } + + list_for_each_entry(producer, &producers, node) { + if (producer->token == consumer->token) { + int ret = __connect(producer, consumer); + if (ret) { + mutex_unlock(&lock); + module_put(THIS_MODULE); + return ret; + } + break; + } + } + + list_add(&consumer->node, &consumers); + + mutex_unlock(&lock); + + return 0; +} +EXPORT_SYMBOL_GPL(irq_bypass_register_consumer); + +/** + * irq_bypass_unregister_consumer - unregister IRQ bypass consumer + * @consumer: pointer to consumer structure + * + * Remove a previously registered IRQ consumer from the list of consumers + * and disconnect it from any connected IRQ producer. + */ +void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer) +{ + struct irq_bypass_consumer *tmp; + struct irq_bypass_producer *producer; + + might_sleep(); + + if (!try_module_get(THIS_MODULE)) + return; /* nothing in the list anyway */ + + mutex_lock(&lock); + + list_for_each_entry(tmp, &consumers, node) { + if (tmp->token != consumer->token) + continue; + + list_for_each_entry(producer, &producers, node) { + if (producer->token == consumer->token) { + __disconnect(producer, consumer); + break; + } + } + + list_del(&consumer->node); + module_put(THIS_MODULE); + break; + } + + mutex_unlock(&lock); + + module_put(THIS_MODULE); +} +EXPORT_SYMBOL_GPL(irq_bypass_unregister_consumer); -- cgit v1.2.3-55-g7522 From 166c9775f1f8b8f00ad1db0fa5c8fc74059d965d Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:42 +0800 Subject: KVM: create kvm_irqfd.h Move _irqfd_resampler and _irqfd struct declarations in a new public header: kvm_irqfd.h. They are respectively renamed into kvm_kernel_irqfd_resampler and kvm_kernel_irqfd. Those datatypes will be used by architecture specific code, in the context of IRQ bypass manager integration. Signed-off-by: Eric Auger Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_irqfd.h | 69 ++++++++++++++++++++++++++++++++++ virt/kvm/eventfd.c | 95 ++++++++++++----------------------------------- 2 files changed, 92 insertions(+), 72 deletions(-) create mode 100644 include/linux/kvm_irqfd.h (limited to 'include') diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h new file mode 100644 index 000000000000..f926b39a26b6 --- /dev/null +++ b/include/linux/kvm_irqfd.h @@ -0,0 +1,69 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * irqfd: Allows an fd to be used to inject an interrupt to the guest + * Credit goes to Avi Kivity for the original idea. + */ + +#ifndef __LINUX_KVM_IRQFD_H +#define __LINUX_KVM_IRQFD_H + +#include +#include + +/* + * Resampling irqfds are a special variety of irqfds used to emulate + * level triggered interrupts. The interrupt is asserted on eventfd + * trigger. On acknowledgment through the irq ack notifier, the + * interrupt is de-asserted and userspace is notified through the + * resamplefd. All resamplers on the same gsi are de-asserted + * together, so we don't need to track the state of each individual + * user. We can also therefore share the same irq source ID. + */ +struct kvm_kernel_irqfd_resampler { + struct kvm *kvm; + /* + * List of resampling struct _irqfd objects sharing this gsi. + * RCU list modified under kvm->irqfds.resampler_lock + */ + struct list_head list; + struct kvm_irq_ack_notifier notifier; + /* + * Entry in list of kvm->irqfd.resampler_list. Use for sharing + * resamplers among irqfds on the same gsi. + * Accessed and modified under kvm->irqfds.resampler_lock + */ + struct list_head link; +}; + +struct kvm_kernel_irqfd { + /* Used for MSI fast-path */ + struct kvm *kvm; + wait_queue_t wait; + /* Update side is protected by irqfds.lock */ + struct kvm_kernel_irq_routing_entry irq_entry; + seqcount_t irq_entry_sc; + /* Used for level IRQ fast-path */ + int gsi; + struct work_struct inject; + /* The resampler used by this irqfd (resampler-only) */ + struct kvm_kernel_irqfd_resampler *resampler; + /* Eventfd notified on resample (resampler-only) */ + struct eventfd_ctx *resamplefd; + /* Entry in list of irqfds for a resampler (resampler-only) */ + struct list_head resampler_link; + /* Used for setup/shutdown */ + struct eventfd_ctx *eventfd; + struct list_head list; + poll_table pt; + struct work_struct shutdown; +}; + +#endif /* __LINUX_KVM_IRQFD_H */ diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index ac89299b8699..413f5a6b61ba 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -39,68 +40,14 @@ #include #ifdef CONFIG_HAVE_KVM_IRQFD -/* - * -------------------------------------------------------------------- - * irqfd: Allows an fd to be used to inject an interrupt to the guest - * - * Credit goes to Avi Kivity for the original idea. - * -------------------------------------------------------------------- - */ - -/* - * Resampling irqfds are a special variety of irqfds used to emulate - * level triggered interrupts. The interrupt is asserted on eventfd - * trigger. On acknowledgement through the irq ack notifier, the - * interrupt is de-asserted and userspace is notified through the - * resamplefd. All resamplers on the same gsi are de-asserted - * together, so we don't need to track the state of each individual - * user. We can also therefore share the same irq source ID. - */ -struct _irqfd_resampler { - struct kvm *kvm; - /* - * List of resampling struct _irqfd objects sharing this gsi. - * RCU list modified under kvm->irqfds.resampler_lock - */ - struct list_head list; - struct kvm_irq_ack_notifier notifier; - /* - * Entry in list of kvm->irqfd.resampler_list. Use for sharing - * resamplers among irqfds on the same gsi. - * Accessed and modified under kvm->irqfds.resampler_lock - */ - struct list_head link; -}; - -struct _irqfd { - /* Used for MSI fast-path */ - struct kvm *kvm; - wait_queue_t wait; - /* Update side is protected by irqfds.lock */ - struct kvm_kernel_irq_routing_entry irq_entry; - seqcount_t irq_entry_sc; - /* Used for level IRQ fast-path */ - int gsi; - struct work_struct inject; - /* The resampler used by this irqfd (resampler-only) */ - struct _irqfd_resampler *resampler; - /* Eventfd notified on resample (resampler-only) */ - struct eventfd_ctx *resamplefd; - /* Entry in list of irqfds for a resampler (resampler-only) */ - struct list_head resampler_link; - /* Used for setup/shutdown */ - struct eventfd_ctx *eventfd; - struct list_head list; - poll_table pt; - struct work_struct shutdown; -}; static struct workqueue_struct *irqfd_cleanup_wq; static void irqfd_inject(struct work_struct *work) { - struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); + struct kvm_kernel_irqfd *irqfd = + container_of(work, struct kvm_kernel_irqfd, inject); struct kvm *kvm = irqfd->kvm; if (!irqfd->resampler) { @@ -121,12 +68,13 @@ irqfd_inject(struct work_struct *work) static void irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) { - struct _irqfd_resampler *resampler; + struct kvm_kernel_irqfd_resampler *resampler; struct kvm *kvm; - struct _irqfd *irqfd; + struct kvm_kernel_irqfd *irqfd; int idx; - resampler = container_of(kian, struct _irqfd_resampler, notifier); + resampler = container_of(kian, + struct kvm_kernel_irqfd_resampler, notifier); kvm = resampler->kvm; kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, @@ -141,9 +89,9 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) } static void -irqfd_resampler_shutdown(struct _irqfd *irqfd) +irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) { - struct _irqfd_resampler *resampler = irqfd->resampler; + struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler; struct kvm *kvm = resampler->kvm; mutex_lock(&kvm->irqfds.resampler_lock); @@ -168,7 +116,8 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd) static void irqfd_shutdown(struct work_struct *work) { - struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); + struct kvm_kernel_irqfd *irqfd = + container_of(work, struct kvm_kernel_irqfd, shutdown); u64 cnt; /* @@ -198,7 +147,7 @@ irqfd_shutdown(struct work_struct *work) /* assumes kvm->irqfds.lock is held */ static bool -irqfd_is_active(struct _irqfd *irqfd) +irqfd_is_active(struct kvm_kernel_irqfd *irqfd) { return list_empty(&irqfd->list) ? false : true; } @@ -209,7 +158,7 @@ irqfd_is_active(struct _irqfd *irqfd) * assumes kvm->irqfds.lock is held */ static void -irqfd_deactivate(struct _irqfd *irqfd) +irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) { BUG_ON(!irqfd_is_active(irqfd)); @@ -224,7 +173,8 @@ irqfd_deactivate(struct _irqfd *irqfd) static int irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { - struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); + struct kvm_kernel_irqfd *irqfd = + container_of(wait, struct kvm_kernel_irqfd, wait); unsigned long flags = (unsigned long)key; struct kvm_kernel_irq_routing_entry irq; struct kvm *kvm = irqfd->kvm; @@ -274,12 +224,13 @@ static void irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { - struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); + struct kvm_kernel_irqfd *irqfd = + container_of(pt, struct kvm_kernel_irqfd, pt); add_wait_queue(wqh, &irqfd->wait); } /* Must be called under irqfds.lock */ -static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) +static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) { struct kvm_kernel_irq_routing_entry *e; struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; @@ -304,7 +255,7 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) static int kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { - struct _irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd, *tmp; struct fd f; struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; int ret; @@ -340,7 +291,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) irqfd->eventfd = eventfd; if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { - struct _irqfd_resampler *resampler; + struct kvm_kernel_irqfd_resampler *resampler; resamplefd = eventfd_ctx_fdget(args->resamplefd); if (IS_ERR(resamplefd)) { @@ -525,7 +476,7 @@ kvm_eventfd_init(struct kvm *kvm) static int kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) { - struct _irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd, *tmp; struct eventfd_ctx *eventfd; eventfd = eventfd_ctx_fdget(args->fd); @@ -581,7 +532,7 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) void kvm_irqfd_release(struct kvm *kvm) { - struct _irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd, *tmp; spin_lock_irq(&kvm->irqfds.lock); @@ -604,7 +555,7 @@ kvm_irqfd_release(struct kvm *kvm) */ void kvm_irq_routing_update(struct kvm *kvm) { - struct _irqfd *irqfd; + struct kvm_kernel_irqfd *irqfd; spin_lock_irq(&kvm->irqfds.lock); -- cgit v1.2.3-55-g7522 From 1a02b27035f82091d51ecafcb9ccaac1f31d4eb2 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:43 +0800 Subject: KVM: introduce kvm_arch functions for IRQ bypass This patch introduces - kvm_arch_irq_bypass_add_producer - kvm_arch_irq_bypass_del_producer - kvm_arch_irq_bypass_stop - kvm_arch_irq_bypass_start They make possible to specialize the KVM IRQ bypass consumer in case CONFIG_KVM_HAVE_IRQ_BYPASS is set. Signed-off-by: Eric Auger [Add weak implementations of the callbacks. - Feng] Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 10 ++++++++++ virt/kvm/Kconfig | 3 +++ virt/kvm/eventfd.c | 12 ++++++++++++ 3 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cd0ba2e931e1..b8543b02b7bc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -1163,4 +1164,13 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) { } #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ + +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, + struct irq_bypass_producer *); +void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, + struct irq_bypass_producer *); +void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); +void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); +#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #endif diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index e2c876d5a03b..9f8014dda2cf 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -47,3 +47,6 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT config KVM_COMPAT def_bool y depends on COMPAT && !S390 + +config HAVE_KVM_IRQ_BYPASS + bool diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 413f5a6b61ba..c4f7abec4261 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -252,6 +252,18 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) write_seqcount_end(&irqfd->irq_entry_sc); } +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +void __attribute__((weak)) kvm_arch_irq_bypass_stop( + struct irq_bypass_consumer *cons) +{ +} + +void __attribute__((weak)) kvm_arch_irq_bypass_start( + struct irq_bypass_consumer *cons) +{ +} +#endif + static int kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { -- cgit v1.2.3-55-g7522 From 9016cfb577a15abd6a7990890ccf6bf1edf04d31 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:44 +0800 Subject: KVM: eventfd: add irq bypass consumer management This patch adds the registration/unregistration of an irq_bypass_consumer on irqfd assignment/deassignment. Signed-off-by: Eric Auger Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_irqfd.h | 2 ++ virt/kvm/eventfd.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index f926b39a26b6..0c1de05098c8 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -64,6 +64,8 @@ struct kvm_kernel_irqfd { struct list_head list; poll_table pt; struct work_struct shutdown; + struct irq_bypass_consumer consumer; + struct irq_bypass_producer *producer; }; #endif /* __LINUX_KVM_IRQFD_H */ diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index c4f7abec4261..7df356d8f1fd 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -140,6 +141,9 @@ irqfd_shutdown(struct work_struct *work) /* * It is now safe to release the object's resources */ +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + irq_bypass_unregister_consumer(&irqfd->consumer); +#endif eventfd_ctx_put(irqfd->eventfd); kfree(irqfd); } @@ -391,6 +395,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) * we might race against the POLLHUP */ fdput(f); +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + irqfd->consumer.token = (void *)irqfd->eventfd; + irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; + irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer; + irqfd->consumer.stop = kvm_arch_irq_bypass_stop; + irqfd->consumer.start = kvm_arch_irq_bypass_start; + ret = irq_bypass_register_consumer(&irqfd->consumer); + if (ret) + pr_info("irq bypass consumer (token %p) registration fails: %d\n", + irqfd->consumer.token, ret); +#endif return 0; -- cgit v1.2.3-55-g7522 From f70c20aaf141adb715a2d750c55154073b02a9c3 Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Fri, 18 Sep 2015 22:29:53 +0800 Subject: KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd' This patch adds an arch specific hooks 'arch_update' in 'struct kvm_kernel_irqfd'. On Intel side, it is used to update the IRTE when VT-d posted-interrupts is used. Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 ++ virt/kvm/eventfd.c | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b8543b02b7bc..5c3f4538807f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1172,5 +1172,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); +int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 7df356d8f1fd..b637965746bb 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -266,6 +266,13 @@ void __attribute__((weak)) kvm_arch_irq_bypass_start( struct irq_bypass_consumer *cons) { } + +int __attribute__((weak)) kvm_arch_update_irqfd_routing( + struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set) +{ + return 0; +} #endif static int @@ -586,9 +593,19 @@ void kvm_irq_routing_update(struct kvm *kvm) spin_lock_irq(&kvm->irqfds.lock); - list_for_each_entry(irqfd, &kvm->irqfds.items, list) + list_for_each_entry(irqfd, &kvm->irqfds.items, list) { irqfd_update(kvm, irqfd); +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + if (irqfd->producer) { + int ret = kvm_arch_update_irqfd_routing( + irqfd->kvm, irqfd->producer->irq, + irqfd->gsi, 1); + WARN_ON(ret); + } +#endif + } + spin_unlock_irq(&kvm->irqfds.lock); } -- cgit v1.2.3-55-g7522 From bf9f6ac8d74969690df1485b33b7c238ca9f2269 Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Fri, 18 Sep 2015 22:29:55 +0800 Subject: KVM: Update Posted-Interrupts Descriptor when vCPU is blocked This patch updates the Posted-Interrupts Descriptor when vCPU is blocked. pre-block: - Add the vCPU to the blocked per-CPU list - Set 'NV' to POSTED_INTR_WAKEUP_VECTOR post-block: - Remove the vCPU from the per-CPU list Signed-off-by: Feng Wu [Concentrate invocation of pre/post-block hooks to vcpu_block. - Paolo] Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/locking.txt | 12 +++ arch/x86/include/asm/kvm_host.h | 11 +++ arch/x86/kvm/vmx.c | 153 ++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 34 +++++--- include/linux/kvm_host.h | 3 + virt/kvm/kvm_main.c | 3 + 6 files changed, 206 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index d68af4dc3006..19f94a6b9bb0 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -166,3 +166,15 @@ Comment: The srcu read lock must be held while accessing memslots (e.g. MMIO/PIO address->device structure mapping (kvm->buses). The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu if it is needed by multiple functions. + +Name: blocked_vcpu_on_cpu_lock +Type: spinlock_t +Arch: x86 +Protects: blocked_vcpu_on_cpu +Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts. + When VT-d posted-interrupts is supported and the VM has assigned + devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu + protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues + wakeup notification event since external interrupts from the + assigned devices happens, we will find the vCPU on the list to + wakeup. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 15664994b6f3..cdbdb559ecd2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -899,6 +899,17 @@ struct kvm_x86_ops { /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; + /* + * Architecture specific hooks for vCPU blocking due to + * HLT instruction. + * Returns for .pre_block(): + * - 0 means continue to block the vCPU. + * - 1 means we cannot block the vCPU since some event + * happens during this period, such as, 'ON' bit in + * posted-interrupts descriptor is set. + */ + int (*pre_block)(struct kvm_vcpu *vcpu); + void (*post_block)(struct kvm_vcpu *vcpu); int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); }; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 99f5c61954ea..c5c22831aee2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -878,6 +878,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs); static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); static DEFINE_PER_CPU(struct desc_ptr, host_gdt); +/* + * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we + * can find which vCPU should be waken up. + */ +static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); +static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); + static unsigned long *vmx_io_bitmap_a; static unsigned long *vmx_io_bitmap_b; static unsigned long *vmx_msr_bitmap_legacy; @@ -2986,6 +2993,8 @@ static int hardware_enable(void) return -EBUSY; INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); + INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); + spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); /* * Now we can enable the vmclear operation in kdump @@ -6045,6 +6054,25 @@ static void update_ple_window_actual_max(void) ple_window_grow, INT_MIN); } +/* + * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. + */ +static void wakeup_handler(void) +{ + struct kvm_vcpu *vcpu; + int cpu = smp_processor_id(); + + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu), + blocked_vcpu_list) { + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + + if (pi_test_on(pi_desc) == 1) + kvm_vcpu_kick(vcpu); + } + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); +} + static __init int hardware_setup(void) { int r = -ENOMEM, i, msr; @@ -6231,6 +6259,8 @@ static __init int hardware_setup(void) kvm_x86_ops->enable_log_dirty_pt_masked = NULL; } + kvm_set_posted_intr_wakeup_handler(wakeup_handler); + return alloc_kvm_area(); out8: @@ -10431,6 +10461,126 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); } +/* + * This routine does the following things for vCPU which is going + * to be blocked if VT-d PI is enabled. + * - Store the vCPU to the wakeup list, so when interrupts happen + * we can find the right vCPU to wake up. + * - Change the Posted-interrupt descriptor as below: + * 'NDST' <-- vcpu->pre_pcpu + * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR + * - If 'ON' is set during this process, which means at least one + * interrupt is posted for this vCPU, we cannot block it, in + * this case, return 1, otherwise, return 0. + * + */ +static int vmx_pre_block(struct kvm_vcpu *vcpu) +{ + unsigned long flags; + unsigned int dest; + struct pi_desc old, new; + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + + if (!kvm_arch_has_assigned_device(vcpu->kvm) || + !irq_remapping_cap(IRQ_POSTING_CAP)) + return 0; + + vcpu->pre_pcpu = vcpu->cpu; + spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + list_add_tail(&vcpu->blocked_vcpu_list, + &per_cpu(blocked_vcpu_on_cpu, + vcpu->pre_pcpu)); + spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + + do { + old.control = new.control = pi_desc->control; + + /* + * We should not block the vCPU if + * an interrupt is posted for it. + */ + if (pi_test_on(pi_desc) == 1) { + spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + list_del(&vcpu->blocked_vcpu_list); + spin_unlock_irqrestore( + &per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + vcpu->pre_pcpu = -1; + + return 1; + } + + WARN((pi_desc->sn == 1), + "Warning: SN field of posted-interrupts " + "is set before blocking\n"); + + /* + * Since vCPU can be preempted during this process, + * vcpu->cpu could be different with pre_pcpu, we + * need to set pre_pcpu as the destination of wakeup + * notification event, then we can find the right vCPU + * to wakeup in wakeup handler if interrupts happen + * when the vCPU is in blocked state. + */ + dest = cpu_physical_id(vcpu->pre_pcpu); + + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; + + /* set 'NV' to 'wakeup vector' */ + new.nv = POSTED_INTR_WAKEUP_VECTOR; + } while (cmpxchg(&pi_desc->control, old.control, + new.control) != old.control); + + return 0; +} + +static void vmx_post_block(struct kvm_vcpu *vcpu) +{ + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + struct pi_desc old, new; + unsigned int dest; + unsigned long flags; + + if (!kvm_arch_has_assigned_device(vcpu->kvm) || + !irq_remapping_cap(IRQ_POSTING_CAP)) + return; + + do { + old.control = new.control = pi_desc->control; + + dest = cpu_physical_id(vcpu->cpu); + + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; + + /* Allow posting non-urgent interrupts */ + new.sn = 0; + + /* set 'NV' to 'notification vector' */ + new.nv = POSTED_INTR_VECTOR; + } while (cmpxchg(&pi_desc->control, old.control, + new.control) != old.control); + + if(vcpu->pre_pcpu != -1) { + spin_lock_irqsave( + &per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + list_del(&vcpu->blocked_vcpu_list); + spin_unlock_irqrestore( + &per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + vcpu->pre_pcpu = -1; + } +} + /* * vmx_update_pi_irte - set IRTE for Posted-Interrupts * @@ -10622,6 +10772,9 @@ static struct kvm_x86_ops vmx_x86_ops = { .flush_log_dirty = vmx_flush_log_dirty, .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, + .pre_block = vmx_pre_block, + .post_block = vmx_post_block, + .pmu_ops = &intel_pmu_ops, .update_pi_irte = vmx_update_pi_irte, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b8425a769c0a..2d2c9bb0d6d6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6335,6 +6335,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } } + /* + * KVM_REQ_EVENT is not set when posted interrupts are set by + * VT-d hardware, so we have to update RVI unconditionally. + */ + if (kvm_lapic_enabled(vcpu)) { + /* + * Update architecture specific hints for APIC + * virtual interrupt delivery. + */ + if (kvm_x86_ops->hwapic_irr_update) + kvm_x86_ops->hwapic_irr_update(vcpu, + kvm_lapic_find_highest_irr(vcpu)); + } + if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { kvm_apic_accept_events(vcpu); if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { @@ -6351,13 +6365,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->enable_irq_window(vcpu); if (kvm_lapic_enabled(vcpu)) { - /* - * Update architecture specific hints for APIC - * virtual interrupt delivery. - */ - if (kvm_x86_ops->hwapic_irr_update) - kvm_x86_ops->hwapic_irr_update(vcpu, - kvm_lapic_find_highest_irr(vcpu)); update_cr8_intercept(vcpu); kvm_lapic_sync_to_vapic(vcpu); } @@ -6493,10 +6500,15 @@ out: static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) { - if (!kvm_arch_vcpu_runnable(vcpu)) { + if (!kvm_arch_vcpu_runnable(vcpu) && + (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) { srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + + if (kvm_x86_ops->post_block) + kvm_x86_ops->post_block(vcpu); + if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) return 1; } @@ -6528,10 +6540,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu) for (;;) { if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && - !vcpu->arch.apf.halted) + !vcpu->arch.apf.halted) { r = vcpu_enter_guest(vcpu); - else + } else { r = vcpu_block(kvm, vcpu); + } + if (r <= 0) break; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5c3f4538807f..9596a2f0977b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -234,6 +234,9 @@ struct kvm_vcpu { unsigned long requests; unsigned long guest_debug; + int pre_pcpu; + struct list_head blocked_vcpu_list; + struct mutex mutex; struct kvm_run *run; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index afd7ae6aec65..a75502c93c3e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -230,6 +230,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) init_waitqueue_head(&vcpu->wq); kvm_async_pf_vcpu_init(vcpu); + vcpu->pre_pcpu = -1; + INIT_LIST_HEAD(&vcpu->blocked_vcpu_list); + page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) { r = -ENOMEM; -- cgit v1.2.3-55-g7522 From ba1aefcd6db5536d3eb3ca3ce7bd6786960140ea Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:46 +0300 Subject: kvm/eventfd: factor out kvm_notify_acked_gsi() Factor out kvm_notify_acked_gsi() helper to iterate over EOI listeners and notify those matching the given gsi. It will be reused in the upcoming Hyper-V SynIC implementation. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + virt/kvm/eventfd.c | 16 +++++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9596a2f0977b..b66861c297c4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -829,6 +829,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); +void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 518421e65b0d..f6b986a41823 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -451,9 +451,18 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) } EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) { struct kvm_irq_ack_notifier *kian; + + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); +} + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ int gsi, idx; trace_kvm_ack_irq(irqchip, pin); @@ -461,10 +470,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) idx = srcu_read_lock(&kvm->irq_srcu); gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); + kvm_notify_acked_gsi(kvm, gsi); srcu_read_unlock(&kvm->irq_srcu, idx); } -- cgit v1.2.3-55-g7522 From c9a5eccac1abf50649949f15754a7635f263a1ff Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:47 +0300 Subject: kvm/eventfd: add arch-specific set_irq Allow for arch-specific interrupt types to be set. For that, add kvm_arch_set_irq() which takes interrupt type-specific action if it recognizes the interrupt type given, and -EWOULDBLOCK otherwise. The default implementation always returns -EWOULDBLOCK. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++++ virt/kvm/eventfd.c | 13 ++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b66861c297c4..eba9caebc9c1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -828,6 +828,10 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); + +int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, + int irq_source_id, int level, bool line_status); + bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index f6b986a41823..e29fd2640709 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -171,6 +171,15 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) queue_work(irqfd_cleanup_wq, &irqfd->shutdown); } +int __attribute__((weak)) kvm_arch_set_irq( + struct kvm_kernel_irq_routing_entry *irq, + struct kvm *kvm, int irq_source_id, + int level, + bool line_status) +{ + return -EWOULDBLOCK; +} + /* * Called with wqh->lock held and interrupts disabled */ @@ -195,7 +204,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) if (irq.type == KVM_IRQ_ROUTING_MSI) kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); - else + else if (kvm_arch_set_irq(&irq, kvm, + KVM_USERSPACE_IRQ_SOURCE_ID, 1, + false) == -EWOULDBLOCK) schedule_work(&irqfd->inject); srcu_read_unlock(&kvm->irq_srcu, idx); } -- cgit v1.2.3-55-g7522 From 3217f7c25bca66eed9b07f0b8bfd1937169b0736 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 27 Aug 2015 16:41:15 +0200 Subject: KVM: Add kvm_arch_vcpu_{un}blocking callbacks Some times it is useful for architecture implementations of KVM to know when the VCPU thread is about to block or when it comes back from blocking (arm/arm64 needs to know this to properly implement timers, for example). Therefore provide a generic architecture callback function in line with what we do elsewhere for KVM generic-arch interactions. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 3 +++ arch/arm64/include/asm/kvm_host.h | 3 +++ arch/mips/include/asm/kvm_host.h | 2 ++ arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/s390/include/asm/kvm_host.h | 2 ++ arch/x86/include/asm/kvm_host.h | 3 +++ include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 3 +++ 8 files changed, 20 insertions(+) (limited to 'include') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index c4072d9f32c7..84da97901f1f 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -234,4 +234,7 @@ static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ed039688c221..e4f4d65f7d2b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -255,4 +255,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 5a1a882e0a75..6ded8d347af9 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -847,5 +847,7 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #endif /* __MIPS_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 827a38d7a9db..c9f122d00920 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -718,5 +718,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_exit(void) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 8ced426091e1..72a614c68ed8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -644,5 +644,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2beee0382088..b28f0f142ecb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1233,4 +1233,7 @@ int x86_set_memory_region(struct kvm *kvm, bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1bef9e21e725..4a86f5f072c0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -625,6 +625,8 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8db1d9361993..7873d6daccb1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2018,6 +2018,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) } while (single_task_running() && ktime_before(cur, stop)); } + kvm_arch_vcpu_blocking(vcpu); + for (;;) { prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); @@ -2031,6 +2033,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) finish_wait(&vcpu->wq, &wait); cur = ktime_get(); + kvm_arch_vcpu_unblocking(vcpu); out: block_ns = ktime_to_ns(cur) - ktime_to_ns(start); -- cgit v1.2.3-55-g7522 From d35268da66870d733ae763fd7f9b06a1f63f395e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 25 Aug 2015 19:48:21 +0200 Subject: arm/arm64: KVM: arch_timer: Only schedule soft timer on vcpu_block We currently schedule a soft timer every time we exit the guest if the timer did not expire while running the guest. This is really not necessary, because the only work we do in the timer work function is to kick the vcpu. Kicking the vcpu does two things: (1) If the vpcu thread is on a waitqueue, make it runnable and remove it from the waitqueue. (2) If the vcpu is running on a different physical CPU from the one doing the kick, it sends a reschedule IPI. The second case cannot happen, because the soft timer is only ever scheduled when the vcpu is not running. The first case is only relevant when the vcpu thread is on a waitqueue, which is only the case when the vcpu thread has called kvm_vcpu_block(). Therefore, we only need to make sure a timer is scheduled for kvm_vcpu_block(), which we do by encapsulating all calls to kvm_vcpu_block() with kvm_timer_{un}schedule calls. Additionally, we only schedule a soft timer if the timer is enabled and unmasked, since it is useless otherwise. Note that theoretically userspace can use the SET_ONE_REG interface to change registers that should cause the timer to fire, even if the vcpu is blocked without a scheduled timer, but this case was not supported before this patch and we leave it for future work for now. Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 3 -- arch/arm/kvm/arm.c | 10 +++++ arch/arm64/include/asm/kvm_host.h | 3 -- include/kvm/arm_arch_timer.h | 2 + virt/kvm/arm/arch_timer.c | 94 +++++++++++++++++++++++++-------------- 5 files changed, 72 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 84da97901f1f..c4072d9f32c7 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -234,7 +234,4 @@ static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} -static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} -static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} - #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 78b286994577..7ed4d475d83a 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -271,6 +271,16 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvm_timer_should_fire(vcpu); } +void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) +{ + kvm_timer_schedule(vcpu); +} + +void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) +{ + kvm_timer_unschedule(vcpu); +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { /* Force users to call KVM_ARM_VCPU_INIT */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e4f4d65f7d2b..ed039688c221 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -255,7 +255,4 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); -static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} -static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} - #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index e1e4d7c38dda..ef14cc1f1f26 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -71,5 +71,7 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); bool kvm_timer_should_fire(struct kvm_vcpu *vcpu); +void kvm_timer_schedule(struct kvm_vcpu *vcpu); +void kvm_timer_unschedule(struct kvm_vcpu *vcpu); #endif diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index b9d3a32cbc04..32095fbb5d7c 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -111,14 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } +static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && + (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) && + !kvm_vgic_get_phys_irq_active(timer->map); +} + bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; cycle_t cval, now; - if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || - !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) || - kvm_vgic_get_phys_irq_active(timer->map)) + if (!kvm_timer_irq_can_fire(vcpu)) return false; cval = timer->cntv_cval; @@ -127,12 +134,57 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) return cval <= now; } +/* + * Schedule the background timer before calling kvm_vcpu_block, so that this + * thread is removed from its waitqueue and made runnable when there's a timer + * interrupt to handle. + */ +void kvm_timer_schedule(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + u64 ns; + cycle_t cval, now; + + BUG_ON(timer_is_armed(timer)); + + /* + * No need to schedule a background timer if the guest timer has + * already expired, because kvm_vcpu_block will return before putting + * the thread to sleep. + */ + if (kvm_timer_should_fire(vcpu)) + return; + + /* + * If the timer is not capable of raising interrupts (disabled or + * masked), then there's no more work for us to do. + */ + if (!kvm_timer_irq_can_fire(vcpu)) + return; + + /* The timer has not yet expired, schedule a background timer */ + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + ns = cyclecounter_cyc2ns(timecounter->cc, + cval - now, + timecounter->mask, + &timecounter->frac); + timer_arm(timer, ns); +} + +void kvm_timer_unschedule(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + timer_disarm(timer); +} + /** * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu * @vcpu: The vcpu pointer * - * Disarm any pending soft timers, since the world-switch code will write the - * virtual timer state back to the physical CPU. + * Check if the virtual timer has expired while we were running in the host, + * and inject an interrupt if that was the case. */ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) { @@ -140,17 +192,6 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) bool phys_active; int ret; - /* - * We're about to run this vcpu again, so there is no need to - * keep the background timer running, as we're about to - * populate the CPU timer again. - */ - timer_disarm(timer); - - /* - * If the timer expired while we were not scheduled, now is the time - * to inject it. - */ if (kvm_timer_should_fire(vcpu)) kvm_timer_inject_irq(vcpu); @@ -176,32 +217,17 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) * kvm_timer_sync_hwstate - sync timer state from cpu * @vcpu: The vcpu pointer * - * Check if the virtual timer was armed and either schedule a corresponding - * soft timer or inject directly if already expired. + * Check if the virtual timer has expired while we were running in the guest, + * and inject an interrupt if that was the case. */ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - cycle_t cval, now; - u64 ns; BUG_ON(timer_is_armed(timer)); - if (kvm_timer_should_fire(vcpu)) { - /* - * Timer has already expired while we were not - * looking. Inject the interrupt and carry on. - */ + if (kvm_timer_should_fire(vcpu)) kvm_timer_inject_irq(vcpu); - return; - } - - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - - ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask, - &timecounter->frac); - timer_arm(timer, ns); } int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, -- cgit v1.2.3-55-g7522 From 4b4b4512da2a844b8da2585609b67fae1ce4f4db Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 30 Aug 2015 15:01:27 +0200 Subject: arm/arm64: KVM: Rework the arch timer to use level-triggered semantics The arch timer currently uses edge-triggered semantics in the sense that the line is never sampled by the vgic and lowering the line from the timer to the vgic doesn't have any effect on the pending state of virtual interrupts in the vgic. This means that we do not support a guest with the otherwise valid behavior of (1) disable interrupts (2) enable the timer (3) disable the timer (4) enable interrupts. Such a guest would validly not expect to see any interrupts on real hardware, but will see interrupts on KVM. This patch fixes this shortcoming through the following series of changes. First, we change the flow of the timer/vgic sync/flush operations. Now the timer is always flushed/synced before the vgic, because the vgic samples the state of the timer output. This has the implication that we move the timer operations in to non-preempible sections, but that is fine after the previous commit getting rid of hrtimer schedules on every entry/exit. Second, we change the internal behavior of the timer, letting the timer keep track of its previous output state, and only lower/raise the line to the vgic when the state changes. Note that in theory this could have been accomplished more simply by signalling the vgic every time the state *potentially* changed, but we don't want to be hitting the vgic more often than necessary. Third, we get rid of the use of the map->active field in the vgic and instead simply set the interrupt as active on the physical distributor whenever the input to the GIC is asserted and conversely clear the physical active state when the input to the GIC is deasserted. Fourth, and finally, we now initialize the timer PPIs (and all the other unused PPIs for now), to be level-triggered, and modify the sync code to sample the line state on HW sync and re-inject a new interrupt if it is still pending at that time. Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 11 +++-- include/kvm/arm_arch_timer.h | 2 +- include/kvm/arm_vgic.h | 3 -- virt/kvm/arm/arch_timer.c | 81 +++++++++++++++++++++++----------- virt/kvm/arm/vgic.c | 102 +++++++++++-------------------------------- 5 files changed, 91 insertions(+), 108 deletions(-) (limited to 'include') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 7ed4d475d83a..59125f48c707 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -561,9 +561,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { local_irq_enable(); + kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); preempt_enable(); - kvm_timer_sync_hwstate(vcpu); continue; } @@ -608,12 +608,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_guest_exit(); trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); + /* + * We must sync the timer state before the vgic state so that + * the vgic can properly sample the updated state of the + * interrupt line. + */ + kvm_timer_sync_hwstate(vcpu); + kvm_vgic_sync_hwstate(vcpu); preempt_enable(); - kvm_timer_sync_hwstate(vcpu); - ret = handle_exit(vcpu, run, ret); } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index ef14cc1f1f26..1800227af9d6 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -51,7 +51,7 @@ struct arch_timer_cpu { bool armed; /* Timer IRQ */ - const struct kvm_irq_level *irq; + struct kvm_irq_level irq; /* VGIC mapping */ struct irq_phys_map *map; diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4e14dac282bb..7bc5d0224ab0 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -159,7 +159,6 @@ struct irq_phys_map { u32 virt_irq; u32 phys_irq; u32 irq; - bool active; }; struct irq_phys_map_entry { @@ -354,8 +353,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int irq); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map); -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 32095fbb5d7c..523816d8c402 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -59,18 +59,6 @@ static void timer_disarm(struct arch_timer_cpu *timer) } } -static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) -{ - int ret; - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - kvm_vgic_set_phys_irq_active(timer->map, true); - ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, - timer->map, - timer->irq->level); - WARN_ON(ret); -} - static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) { struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; @@ -116,8 +104,7 @@ static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && - (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) && - !kvm_vgic_get_phys_irq_active(timer->map); + (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE); } bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) @@ -134,6 +121,41 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) return cval <= now; } +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) +{ + int ret; + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + BUG_ON(!vgic_initialized(vcpu->kvm)); + + timer->irq.level = new_level; + ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, + timer->map, + timer->irq.level); + WARN_ON(ret); +} + +/* + * Check if there was a change in the timer state (should we raise or lower + * the line level to the GIC). + */ +static void kvm_timer_update_state(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + /* + * If userspace modified the timer registers via SET_ONE_REG before + * the vgic was initialized, we mustn't set the timer->irq.level value + * because the guest would never see the interrupt. Instead wait + * until we call this function from kvm_timer_flush_hwstate. + */ + if (!vgic_initialized(vcpu->kvm)) + return; + + if (kvm_timer_should_fire(vcpu) != timer->irq.level) + kvm_timer_update_irq(vcpu, !timer->irq.level); +} + /* * Schedule the background timer before calling kvm_vcpu_block, so that this * thread is removed from its waitqueue and made runnable when there's a timer @@ -192,17 +214,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) bool phys_active; int ret; - if (kvm_timer_should_fire(vcpu)) - kvm_timer_inject_irq(vcpu); + kvm_timer_update_state(vcpu); /* - * We keep track of whether the edge-triggered interrupt has been - * signalled to the vgic/guest, and if so, we mask the interrupt and - * the physical distributor to prevent the timer from raising a - * physical interrupt whenever we run a guest, preventing forward - * VCPU progress. + * If we enter the guest with the virtual input level to the VGIC + * asserted, then we have already told the VGIC what we need to, and + * we don't need to exit from the guest until the guest deactivates + * the already injected interrupt, so therefore we should set the + * hardware active state to prevent unnecessary exits from the guest. + * + * Conversely, if the virtual input level is deasserted, then always + * clear the hardware active state to ensure that hardware interrupts + * from the timer triggers a guest exit. */ - if (kvm_vgic_get_phys_irq_active(timer->map)) + if (timer->irq.level) phys_active = true; else phys_active = false; @@ -226,8 +251,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) BUG_ON(timer_is_armed(timer)); - if (kvm_timer_should_fire(vcpu)) - kvm_timer_inject_irq(vcpu); + /* + * The guest could have modified the timer registers or the timer + * could have expired, update the timer state. + */ + kvm_timer_update_state(vcpu); } int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, @@ -242,7 +270,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, * kvm_vcpu_set_target(). To handle this, we determine * vcpu timer irq number when the vcpu is reset. */ - timer->irq = irq; + timer->irq.irq = irq->irq; /* * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 @@ -251,6 +279,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, * the ARMv7 architecture. */ timer->cntv_ctl = 0; + kvm_timer_update_state(vcpu); /* * Tell the VGIC that the virtual interrupt is tied to a @@ -295,6 +324,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) default: return -1; } + + kvm_timer_update_state(vcpu); return 0; } diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index a44ecf9eca4e..3c2909c1bda3 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -537,34 +537,6 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, return false; } -/* - * If a mapped interrupt's state has been modified by the guest such that it - * is no longer active or pending, without it have gone through the sync path, - * then the map->active field must be cleared so the interrupt can be taken - * again. - */ -static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct list_head *root; - struct irq_phys_map_entry *entry; - struct irq_phys_map *map; - - rcu_read_lock(); - - /* Check for PPIs */ - root = &vgic_cpu->irq_phys_map_list; - list_for_each_entry_rcu(entry, root, entry) { - map = &entry->map; - - if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) && - !vgic_irq_is_active(vcpu, map->virt_irq)) - map->active = false; - } - - rcu_read_unlock(); -} - bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, phys_addr_t offset, int vcpu_id) @@ -595,7 +567,6 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm, vcpu_id, offset); vgic_reg_access(mmio, reg, offset, mode); - vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id)); vgic_update_state(kvm); return true; } @@ -633,7 +604,6 @@ bool vgic_handle_clear_active_reg(struct kvm *kvm, ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); if (mmio->is_write) { - vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id)); vgic_update_state(kvm); return true; } @@ -1443,29 +1413,37 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) /* * Save the physical active state, and reset it to inactive. * - * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. + * Return true if there's a pending level triggered interrupt line to queue. */ -static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) +static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) { + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct irq_phys_map *map; + bool phys_active; + bool level_pending; int ret; if (!(vlr.state & LR_HW)) - return 0; + return false; map = vgic_irq_map_search(vcpu, vlr.irq); BUG_ON(!map); ret = irq_get_irqchip_state(map->irq, IRQCHIP_STATE_ACTIVE, - &map->active); + &phys_active); WARN_ON(ret); - if (map->active) + if (phys_active) return 0; - return 1; + /* Mapped edge-triggered interrupts not yet supported. */ + WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); + spin_lock(&dist->lock); + level_pending = process_level_irq(vcpu, lr, vlr); + spin_unlock(&dist->lock); + return level_pending; } /* Sync back the VGIC state after a guest run */ @@ -1490,18 +1468,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) continue; vlr = vgic_get_lr(vcpu, lr); - if (vgic_sync_hwirq(vcpu, vlr)) { - /* - * So this is a HW interrupt that the guest - * EOI-ed. Clean the LR state and allow the - * interrupt to be sampled again. - */ - vlr.state = 0; - vlr.hwirq = 0; - vgic_set_lr(vcpu, lr, vlr); - vgic_irq_clear_queued(vcpu, vlr.irq); - set_bit(lr, elrsr_ptr); - } + if (vgic_sync_hwirq(vcpu, lr, vlr)) + level_pending = true; if (!test_bit(lr, elrsr_ptr)) continue; @@ -1880,30 +1848,6 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu) kfree(entry); } -/** - * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ - * - * Return the logical active state of a mapped interrupt. This doesn't - * necessarily reflects the current HW state. - */ -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map) -{ - BUG_ON(!map); - return map->active; -} - -/** - * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ - * - * Set the logical active state of a mapped interrupt. This doesn't - * immediately affects the HW state. - */ -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active) -{ - BUG_ON(!map); - map->active = active; -} - /** * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping * @vcpu: The VCPU pointer @@ -2129,17 +2073,23 @@ int vgic_init(struct kvm *kvm) } /* - * Enable all SGIs and configure all private IRQs as - * edge-triggered. + * Enable and configure all SGIs to be edge-triggere and + * configure all PPIs as level-triggered. */ for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - if (i < VGIC_NR_SGIS) + if (i < VGIC_NR_SGIS) { + /* SGIs */ vgic_bitmap_set_irq_val(&dist->irq_enabled, vcpu->vcpu_id, i, 1); - if (i < VGIC_NR_PRIVATE_IRQS) vgic_bitmap_set_irq_val(&dist->irq_cfg, vcpu->vcpu_id, i, VGIC_CFG_EDGE); + } else if (i < VGIC_NR_PRIVATE_IRQS) { + /* PPIs */ + vgic_bitmap_set_irq_val(&dist->irq_cfg, + vcpu->vcpu_id, i, + VGIC_CFG_LEVEL); + } } vgic_enable(vcpu); -- cgit v1.2.3-55-g7522 From 5fdf876d30ce7c9b63045b3db8374912ab9b262c Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Thu, 15 Oct 2015 22:16:28 +0200 Subject: KVM: arm: Do not indent the arguments of DECLARE_BITMAP Besides being a coding style issue, it confuses make tags: ctags: Warning: include/kvm/arm_vgic.h:307: null expansion of name pattern "\1" ctags: Warning: include/kvm/arm_vgic.h:308: null expansion of name pattern "\1" ctags: Warning: include/kvm/arm_vgic.h:309: null expansion of name pattern "\1" ctags: Warning: include/kvm/arm_vgic.h:317: null expansion of name pattern "\1" Cc: kvmarm@lists.cs.columbia.edu Signed-off-by: Michal Marek Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 7bc5d0224ab0..8065801a1847 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -299,9 +299,9 @@ struct vgic_cpu { u8 *vgic_irq_lr_map; /* Pending/active/both interrupts on this VCPU */ - DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); - DECLARE_BITMAP( active_percpu, VGIC_NR_PRIVATE_IRQS); - DECLARE_BITMAP( pend_act_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP(pending_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP(active_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP(pend_act_percpu, VGIC_NR_PRIVATE_IRQS); /* Pending/active/both shared interrupts, dynamically sized */ unsigned long *pending_shared; @@ -309,7 +309,7 @@ struct vgic_cpu { unsigned long *pend_act_shared; /* Bitmap of used/free list registers */ - DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); + DECLARE_BITMAP(lr_used, VGIC_V2_MAX_LRS); /* Number of list registers on this CPU */ int nr_lr; -- cgit v1.2.3-55-g7522 From c4cd4c168b81dad53e659d18cdae653bc0ec2384 Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Tue, 27 Oct 2015 11:37:29 +0300 Subject: KVM: arm/arm64: Optimize away redundant LR tracking Currently we use vgic_irq_lr_map in order to track which LRs hold which IRQs, and lr_used bitmap in order to track which LRs are used or free. vgic_irq_lr_map is actually used only for piggy-back optimization, and can be easily replaced by iteration over lr_used. This is good because in future, when LPI support is introduced, number of IRQs will grow up to at least 16384, while numbers from 1024 to 8192 are never going to be used. This would be a huge memory waste. In its turn, lr_used is also completely redundant since ae705930fca6322600690df9dc1c7d0516145a93 ("arm/arm64: KVM: Keep elrsr/aisr in sync with software model"), because together with lr_used we also update elrsr. This allows to easily replace lr_used with elrsr, inverting all conditions (because in elrsr '1' means 'free'). Signed-off-by: Pavel Fedin Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 6 ------ virt/kvm/arm/vgic-v2.c | 1 + virt/kvm/arm/vgic-v3.c | 1 + virt/kvm/arm/vgic.c | 53 ++++++++++++++------------------------------------ 4 files changed, 17 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 8065801a1847..3936bf802e1d 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -295,9 +295,6 @@ struct vgic_v3_cpu_if { }; struct vgic_cpu { - /* per IRQ to LR mapping */ - u8 *vgic_irq_lr_map; - /* Pending/active/both interrupts on this VCPU */ DECLARE_BITMAP(pending_percpu, VGIC_NR_PRIVATE_IRQS); DECLARE_BITMAP(active_percpu, VGIC_NR_PRIVATE_IRQS); @@ -308,9 +305,6 @@ struct vgic_cpu { unsigned long *active_shared; unsigned long *pend_act_shared; - /* Bitmap of used/free list registers */ - DECLARE_BITMAP(lr_used, VGIC_V2_MAX_LRS); - /* Number of list registers on this CPU */ int nr_lr; diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index 8d7b04db8471..c0f5d7fad9ea 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -158,6 +158,7 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu) * anyway. */ vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; /* Get the show on the road... */ vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 7dd5d62f10a1..92003cb61a0a 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -193,6 +193,7 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) * anyway. */ vgic_v3->vgic_vmcr = 0; + vgic_v3->vgic_elrsr = ~0; /* * If we are emulating a GICv3, we do it in an non-GICv2-compatible diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index d4669eb29b77..265a41035728 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -108,6 +108,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); +static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu); static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, int virt_irq); static int compute_pending_for_cpu(struct kvm_vcpu *vcpu); @@ -691,9 +692,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 elrsr = vgic_get_elrsr(vcpu); + unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); int i; - for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { + for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) { struct vgic_lr lr = vgic_get_lr(vcpu, i); /* @@ -1098,7 +1101,6 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu) static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); /* @@ -1112,8 +1114,6 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) vlr.state = 0; vgic_set_lr(vcpu, lr_nr, vlr); - clear_bit(lr_nr, vgic_cpu->lr_used); - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); } @@ -1128,10 +1128,11 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) */ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 elrsr = vgic_get_elrsr(vcpu); + unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); int lr; - for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) { + for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { @@ -1188,8 +1189,9 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, */ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + u64 elrsr = vgic_get_elrsr(vcpu); + unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); struct vgic_lr vlr; int lr; @@ -1200,28 +1202,22 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) kvm_debug("Queue IRQ%d\n", irq); - lr = vgic_cpu->vgic_irq_lr_map[irq]; - /* Do we have an active interrupt for the same CPUID? */ - if (lr != LR_EMPTY) { + for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { vlr = vgic_get_lr(vcpu, lr); - if (vlr.source == sgi_source_id) { + if (vlr.irq == irq && vlr.source == sgi_source_id) { kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); - BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); return true; } } /* Try to use another LR for this interrupt */ - lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic->nr_lr); + lr = find_first_bit(elrsr_ptr, vgic->nr_lr); if (lr >= vgic->nr_lr) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_irq_lr_map[irq] = lr; - set_bit(lr, vgic_cpu->lr_used); vlr.irq = irq; vlr.source = sgi_source_id; @@ -1456,7 +1452,6 @@ static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) /* Sync back the VGIC state after a guest run */ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; u64 elrsr; unsigned long *elrsr_ptr; @@ -1469,22 +1464,10 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) /* Deal with HW interrupts, and clear mappings for empty LRs */ for (lr = 0; lr < vgic->nr_lr; lr++) { - struct vgic_lr vlr; - - if (!test_bit(lr, vgic_cpu->lr_used)) - continue; - - vlr = vgic_get_lr(vcpu, lr); - if (vgic_sync_hwirq(vcpu, lr, vlr)) - level_pending = true; - - if (!test_bit(lr, elrsr_ptr)) - continue; - - clear_bit(lr, vgic_cpu->lr_used); + struct vgic_lr vlr = vgic_get_lr(vcpu, lr); + level_pending |= vgic_sync_hwirq(vcpu, lr, vlr); BUG_ON(vlr.irq >= dist->nr_irqs); - vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; } /* Check if we still have something up our sleeve... */ @@ -1912,12 +1895,10 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) kfree(vgic_cpu->pending_shared); kfree(vgic_cpu->active_shared); kfree(vgic_cpu->pend_act_shared); - kfree(vgic_cpu->vgic_irq_lr_map); vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); vgic_cpu->pending_shared = NULL; vgic_cpu->active_shared = NULL; vgic_cpu->pend_act_shared = NULL; - vgic_cpu->vgic_irq_lr_map = NULL; } static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) @@ -1928,18 +1909,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); - vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); if (!vgic_cpu->pending_shared || !vgic_cpu->active_shared - || !vgic_cpu->pend_act_shared - || !vgic_cpu->vgic_irq_lr_map) { + || !vgic_cpu->pend_act_shared) { kvm_vgic_vcpu_destroy(vcpu); return -ENOMEM; } - memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs); - /* * Store the number of LRs per vcpu, so we don't have to go * all the way to the distributor structure to find out. Only -- cgit v1.2.3-55-g7522 From 26caea7693cb99833fe4ecc544c842289d6b3f69 Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Tue, 27 Oct 2015 11:37:31 +0300 Subject: KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr() Now we see that vgic_set_lr() and vgic_sync_lr_elrsr() are always used together. Merge them into one function, saving from second vgic_ops dereferencing every time. Signed-off-by: Pavel Fedin Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 1 - virt/kvm/arm/vgic-v2.c | 5 ----- virt/kvm/arm/vgic-v3.c | 5 ----- virt/kvm/arm/vgic.c | 14 ++------------ 4 files changed, 2 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 3936bf802e1d..f62addc17dcf 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -112,7 +112,6 @@ struct vgic_vmcr { struct vgic_ops { struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); - void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); u64 (*get_eisr)(const struct kvm_vcpu *vcpu); void (*clear_eisr)(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index c0f5d7fad9ea..ff02f08df74d 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -79,11 +79,7 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; -} -static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ if (!(lr_desc.state & LR_STATE_MASK)) vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); else @@ -167,7 +163,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu) static const struct vgic_ops vgic_v2_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, - .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, .get_elrsr = vgic_v2_get_elrsr, .get_eisr = vgic_v2_get_eisr, .clear_eisr = vgic_v2_clear_eisr, diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 92003cb61a0a..487d6357b7e7 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -112,11 +112,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, } vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; -} -static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ if (!(lr_desc.state & LR_STATE_MASK)) vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); else @@ -212,7 +208,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) static const struct vgic_ops vgic_v3_ops = { .get_lr = vgic_v3_get_lr, .set_lr = vgic_v3_set_lr, - .sync_lr_elrsr = vgic_v3_sync_lr_elrsr, .get_elrsr = vgic_v3_get_elrsr, .get_eisr = vgic_v3_get_eisr, .clear_eisr = vgic_v3_clear_eisr, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 96e45f3da534..fe451d4885ae 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1032,12 +1032,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, vgic_ops->set_lr(vcpu, lr, vlr); } -static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr vlr) -{ - vgic_ops->sync_lr_elrsr(vcpu, lr, vlr); -} - static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) { return vgic_ops->get_elrsr(vcpu); @@ -1100,7 +1094,6 @@ static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu) vlr.state = 0; vgic_set_lr(vcpu, lr_nr, vlr); - vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); } /* @@ -1162,7 +1155,6 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, } vgic_set_lr(vcpu, lr_nr, vlr); - vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); } /* @@ -1340,8 +1332,6 @@ static int process_queued_irq(struct kvm_vcpu *vcpu, vlr.hwirq = 0; vgic_set_lr(vcpu, lr, vlr); - vgic_sync_lr_elrsr(vcpu, lr, vlr); - return pending; } @@ -1442,8 +1432,6 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) bool level_pending; level_pending = vgic_process_maintenance(vcpu); - elrsr = vgic_get_elrsr(vcpu); - elrsr_ptr = u64_to_bitmask(&elrsr); /* Deal with HW interrupts, and clear mappings for empty LRs */ for (lr = 0; lr < vgic->nr_lr; lr++) { @@ -1454,6 +1442,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ + elrsr = vgic_get_elrsr(vcpu); + elrsr_ptr = u64_to_bitmask(&elrsr); pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); if (level_pending || pending < vgic->nr_lr) set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); -- cgit v1.2.3-55-g7522 From c75efa974e013640496620f26f0b532cb5cb17f9 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:50 +0300 Subject: drivers/hv: share Hyper-V SynIC constants with userspace Moved Hyper-V synic contants from guest Hyper-V drivers private header into x86 arch uapi Hyper-V header. Added Hyper-V synic msr's flags into x86 arch uapi Hyper-V header. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/hyperv.h | 12 ++++++++++++ drivers/hv/hyperv_vmbus.h | 5 ----- include/linux/hyperv.h | 1 + 3 files changed, 13 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 2677a0aac2cc..040d4083c24f 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -257,4 +257,16 @@ typedef struct _HV_REFERENCE_TSC_PAGE { __s64 tsc_offset; } HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; +/* Define the number of synthetic interrupt sources. */ +#define HV_SYNIC_SINT_COUNT (16) +/* Define the expected SynIC version. */ +#define HV_SYNIC_VERSION_1 (0x1) + +#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) +#define HV_SYNIC_SIMP_ENABLE (1ULL << 0) +#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0) +#define HV_SYNIC_SINT_MASKED (1ULL << 16) +#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) +#define HV_SYNIC_SINT_VECTOR_MASK (0xFF) + #endif diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 3d70e36c918e..3782636562a1 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -63,9 +63,6 @@ enum hv_cpuid_function { /* Define version of the synthetic interrupt controller. */ #define HV_SYNIC_VERSION (1) -/* Define the expected SynIC version. */ -#define HV_SYNIC_VERSION_1 (0x1) - /* Define synthetic interrupt controller message constants. */ #define HV_MESSAGE_SIZE (256) #define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) @@ -105,8 +102,6 @@ enum hv_message_type { HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 }; -/* Define the number of synthetic interrupt sources. */ -#define HV_SYNIC_SINT_COUNT (16) #define HV_SYNIC_STIMER_COUNT (4) /* Define invalid partition identifier. */ diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 54733d5b503e..8fdc17b84739 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -26,6 +26,7 @@ #define _HYPERV_H #include +#include #include #include -- cgit v1.2.3-55-g7522 From b97e6de9c96cefaa02a6a7464731ea504b45e150 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Oct 2015 19:16:47 +0100 Subject: KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic We do not want to do too much work in atomic context, in particular not walking all the VCPUs of the virtual machine. So we want to distinguish the architecture-specific injection function for irqfd from kvm_set_msi. Since it's still empty, reuse the newly added kvm_arch_set_irq and rename it to kvm_arch_set_irq_inatomic. Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/irq_comm.c | 14 ++++++++------ include/linux/kvm_host.h | 7 +++---- virt/kvm/eventfd.c | 11 ++++------- 3 files changed, 15 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 8f4499c7ffc1..75dc633c48dc 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -124,12 +124,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, } -static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm) +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) { struct kvm_lapic_irq irq; int r; + if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) + return -EWOULDBLOCK; + kvm_set_msi_irq(e, &irq); if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) @@ -165,10 +169,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) idx = srcu_read_lock(&kvm->irq_srcu); if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { e = &entries[0]; - if (likely(e->type == KVM_IRQ_ROUTING_MSI)) - ret = kvm_set_msi_inatomic(e, kvm); - else - ret = -EWOULDBLOCK; + ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id, + irq, level); } srcu_read_unlock(&kvm->irq_srcu, idx); return ret; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 87189a41d904..15c78f320678 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -830,10 +830,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); - -int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, - int irq_source_id, int level, bool line_status); - +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status); bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index e29fd2640709..46dbc0a7dfc1 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -171,7 +171,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) queue_work(irqfd_cleanup_wq, &irqfd->shutdown); } -int __attribute__((weak)) kvm_arch_set_irq( +int __attribute__((weak)) kvm_arch_set_irq_inatomic( struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, int irq_source_id, int level, @@ -201,12 +201,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) irq = irqfd->irq_entry; } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); /* An event has been signaled, inject an interrupt */ - if (irq.type == KVM_IRQ_ROUTING_MSI) - kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, - false); - else if (kvm_arch_set_irq(&irq, kvm, - KVM_USERSPACE_IRQ_SOURCE_ID, 1, - false) == -EWOULDBLOCK) + if (kvm_arch_set_irq_inatomic(&irq, kvm, + KVM_USERSPACE_IRQ_SOURCE_ID, 1, + false) == -EWOULDBLOCK) schedule_work(&irqfd->inject); srcu_read_unlock(&kvm->irq_srcu, idx); } -- cgit v1.2.3-55-g7522 From 8a22f234a81ab4d1de5d948c3478608f08a9b844 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Oct 2015 18:52:02 +0100 Subject: KVM: x86: move kvm_set_irq_inatomic to legacy device assignment The function is not used outside device assignment, and kvm_arch_set_irq_inatomic has a different prototype. Move it here and make it static to avoid confusion. Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/assigned-dev.c | 37 +++++++++++++++++++++++++++++++++++++ arch/x86/kvm/irq_comm.c | 34 ---------------------------------- include/linux/kvm_host.h | 1 - 3 files changed, 37 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c index 1c17ee807ef7..9dc091acd5fb 100644 --- a/arch/x86/kvm/assigned-dev.c +++ b/arch/x86/kvm/assigned-dev.c @@ -21,6 +21,7 @@ #include #include "irq.h" #include "assigned-dev.h" +#include "trace/events/kvm.h" struct kvm_assigned_dev_kernel { struct kvm_irq_ack_notifier ack_notifier; @@ -131,6 +132,42 @@ static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) return IRQ_HANDLED; } +/* + * Deliver an IRQ in an atomic context if we can, or return a failure, + * user can retry in a process context. + * Return value: + * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. + * Other values - No need to retry. + */ +static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, + int level) +{ + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; + struct kvm_kernel_irq_routing_entry *e; + int ret = -EINVAL; + int idx; + + trace_kvm_set_irq(irq, level, irq_source_id); + + /* + * Injection into either PIC or IOAPIC might need to scan all CPUs, + * which would need to be retried from thread context; when same GSI + * is connected to both PIC and IOAPIC, we'd have to report a + * partial failure here. + * Since there's no easy way to do this, we only support injecting MSI + * which is limited to 1:1 GSI mapping. + */ + idx = srcu_read_lock(&kvm->irq_srcu); + if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { + e = &entries[0]; + ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id, + irq, level); + } + srcu_read_unlock(&kvm->irq_srcu, idx); + return ret; +} + + static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) { struct kvm_assigned_dev_kernel *assigned_dev = dev_id; diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 75dc633c48dc..84b96d319909 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -142,40 +142,6 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, return -EWOULDBLOCK; } -/* - * Deliver an IRQ in an atomic context if we can, or return a failure, - * user can retry in a process context. - * Return value: - * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. - * Other values - No need to retry. - */ -int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) -{ - struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; - struct kvm_kernel_irq_routing_entry *e; - int ret = -EINVAL; - int idx; - - trace_kvm_set_irq(irq, level, irq_source_id); - - /* - * Injection into either PIC or IOAPIC might need to scan all CPUs, - * which would need to be retried from thread context; when same GSI - * is connected to both PIC and IOAPIC, we'd have to report a - * partial failure here. - * Since there's no easy way to do this, we only support injecting MSI - * which is limited to 1:1 GSI mapping. - */ - idx = srcu_read_lock(&kvm->irq_srcu); - if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { - e = &entries[0]; - ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id, - irq, level); - } - srcu_read_unlock(&kvm->irq_srcu, idx); - return ret; -} - int kvm_request_irq_source_id(struct kvm *kvm) { unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 15c78f320678..242a6d2b53ff 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -827,7 +827,6 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin); int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); -int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, -- cgit v1.2.3-55-g7522