From 81aa8efe0190cf5bf7eaafb57341cd7d0aea96cd Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Mon, 7 Oct 2013 16:13:44 +0200 Subject: KVM: s390: add and extend interrupt information data structs With the currently available struct kvm_s390_interrupt it is not possible to inject every kind of interrupt as defined in the z/Architecture. Add additional interruption parameters to the structures and move it to kvm.h Signed-off-by: Jens Freimann Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index eef3dd3fd9a9..3ffc9646e742 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -168,18 +169,6 @@ struct kvm_vcpu_stat { u32 diagnose_9c; }; -struct kvm_s390_io_info { - __u16 subchannel_id; /* 0x0b8 */ - __u16 subchannel_nr; /* 0x0ba */ - __u32 io_int_parm; /* 0x0bc */ - __u32 io_int_word; /* 0x0c0 */ -}; - -struct kvm_s390_ext_info { - __u32 ext_params; - __u64 ext_params2; -}; - #define PGM_OPERATION 0x01 #define PGM_PRIVILEGED_OP 0x02 #define PGM_EXECUTE 0x03 @@ -188,27 +177,6 @@ struct kvm_s390_ext_info { #define PGM_SPECIFICATION 0x06 #define PGM_DATA 0x07 -struct kvm_s390_pgm_info { - __u16 code; -}; - -struct kvm_s390_prefix_info { - __u32 address; -}; - -struct kvm_s390_extcall_info { - __u16 code; -}; - -struct kvm_s390_emerg_info { - __u16 code; -}; - -struct kvm_s390_mchk_info { - __u64 cr14; - __u64 mcic; -}; - struct kvm_s390_interrupt_info { struct list_head list; u64 type; -- cgit v1.2.3-55-g7522 From c05c4186bbe4e99d64e8a36f7ca7f480da5d109f Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Mon, 7 Oct 2013 16:13:45 +0200 Subject: KVM: s390: add floating irq controller This patch adds a floating irq controller as a kvm_device. It will be necessary for migration of floating interrupts as well as for hardening the reset code by allowing user space to explicitly remove all pending floating interrupts. Signed-off-by: Jens Freimann Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/devices/s390_flic.txt | 36 +++ arch/s390/include/asm/kvm_host.h | 1 + arch/s390/include/uapi/asm/kvm.h | 14 ++ arch/s390/kvm/interrupt.c | 304 ++++++++++++++++++++---- arch/s390/kvm/kvm-s390.c | 1 + include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 1 + virt/kvm/kvm_main.c | 5 + 8 files changed, 312 insertions(+), 51 deletions(-) create mode 100644 Documentation/virtual/kvm/devices/s390_flic.txt (limited to 'arch/s390') diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt new file mode 100644 index 000000000000..6b557953066a --- /dev/null +++ b/Documentation/virtual/kvm/devices/s390_flic.txt @@ -0,0 +1,36 @@ +FLIC (floating interrupt controller) +==================================== + +FLIC handles floating (non per-cpu) interrupts, i.e. I/O, service and some +machine check interruptions. All interrupts are stored in a per-vm list of +pending interrupts. FLIC performs operations on this list. + +Only one FLIC instance may be instantiated. + +FLIC provides support to +- add interrupts (KVM_DEV_FLIC_ENQUEUE) +- inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS) +- purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS) + +Groups: + KVM_DEV_FLIC_ENQUEUE + Passes a buffer and length into the kernel which are then injected into + the list of pending interrupts. + attr->addr contains the pointer to the buffer and attr->attr contains + the length of the buffer. + The format of the data structure kvm_s390_irq as it is copied from userspace + is defined in usr/include/linux/kvm.h. + + KVM_DEV_FLIC_GET_ALL_IRQS + Copies all floating interrupts into a buffer provided by userspace. + When the buffer is too small it returns -ENOMEM, which is the indication + for userspace to try again with a bigger buffer. + All interrupts remain pending, i.e. are not deleted from the list of + currently pending interrupts. + attr->addr contains the userspace address of the buffer into which all + interrupt data will be copied. + attr->attr contains the size of the buffer in bytes. + + KVM_DEV_FLIC_CLEAR_IRQS + Simply deletes all elements from the list of currently pending floating + interrupts. No interrupts are injected into the guest. diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3ffc9646e742..59635b5c59a6 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -243,6 +243,7 @@ struct kvm_arch{ struct sca_block *sca; debug_info_t *dbf; struct kvm_s390_float_interrupt float_int; + struct kvm_device *flic; struct gmap *gmap; int css_support; }; diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index d25da598ec62..38d5f98552bb 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -16,6 +16,20 @@ #define __KVM_S390 +/* Device control API: s390-specific devices */ +#define KVM_DEV_FLIC_GET_ALL_IRQS 1 +#define KVM_DEV_FLIC_ENQUEUE 2 +#define KVM_DEV_FLIC_CLEAR_IRQS 3 +/* + * We can have up to 4*64k pending subchannels + 8 adapter interrupts, + * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. + * There are also sclp and machine checks. This gives us + * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000 + * Lets round up to 8192 pages. + */ + +#define KVM_S390_FLIC_MAX_BUFFER 0x2000000 + /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { /* general purpose regs for s390 */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5f79d2d79ca7..a5f18babed4c 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -659,53 +659,86 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, return inti; } -int kvm_s390_inject_vm(struct kvm *kvm, - struct kvm_s390_interrupt *s390int) +static void __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { struct kvm_s390_local_interrupt *li; struct kvm_s390_float_interrupt *fi; - struct kvm_s390_interrupt_info *inti, *iter; + struct kvm_s390_interrupt_info *iter; int sigcpu; + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + if (!is_ioint(inti->type)) { + list_add_tail(&inti->list, &fi->list); + } else { + u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word); + + /* Keep I/O interrupts sorted in isc order. */ + list_for_each_entry(iter, &fi->list, list) { + if (!is_ioint(iter->type)) + continue; + if (int_word_to_isc_bits(iter->io.io_int_word) + <= isc_bits) + continue; + break; + } + list_add_tail(&inti->list, &iter->list); + } + atomic_set(&fi->active, 1); + sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); + if (sigcpu == KVM_MAX_VCPUS) { + do { + sigcpu = fi->next_rr_cpu++; + if (sigcpu == KVM_MAX_VCPUS) + sigcpu = fi->next_rr_cpu = 0; + } while (fi->local_int[sigcpu] == NULL); + } + li = fi->local_int[sigcpu]; + spin_lock_bh(&li->lock); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); + spin_unlock_bh(&li->lock); + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); +} + +int kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int) +{ + struct kvm_s390_interrupt_info *inti; + inti = kzalloc(sizeof(*inti), GFP_KERNEL); if (!inti) return -ENOMEM; - switch (s390int->type) { + inti->type = s390int->type; + switch (inti->type) { case KVM_S390_INT_VIRTIO: VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx", s390int->parm, s390int->parm64); - inti->type = s390int->type; inti->ext.ext_params = s390int->parm; inti->ext.ext_params2 = s390int->parm64; break; case KVM_S390_INT_SERVICE: VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); - inti->type = s390int->type; inti->ext.ext_params = s390int->parm; break; - case KVM_S390_PROGRAM_INT: - case KVM_S390_SIGP_STOP: - case KVM_S390_INT_EXTERNAL_CALL: - case KVM_S390_INT_EMERGENCY: - kfree(inti); - return -EINVAL; case KVM_S390_MCHK: VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", s390int->parm64); - inti->type = s390int->type; inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ inti->mchk.mcic = s390int->parm64; break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (s390int->type & IOINT_AI_MASK) + if (inti->type & IOINT_AI_MASK) VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); else VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", s390int->type & IOINT_CSSID_MASK, s390int->type & IOINT_SSID_MASK, s390int->type & IOINT_SCHID_MASK); - inti->type = s390int->type; inti->io.subchannel_id = s390int->parm >> 16; inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; inti->io.io_int_parm = s390int->parm64 >> 32; @@ -718,42 +751,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64, 2); - mutex_lock(&kvm->lock); - fi = &kvm->arch.float_int; - spin_lock(&fi->lock); - if (!is_ioint(inti->type)) - list_add_tail(&inti->list, &fi->list); - else { - u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word); - - /* Keep I/O interrupts sorted in isc order. */ - list_for_each_entry(iter, &fi->list, list) { - if (!is_ioint(iter->type)) - continue; - if (int_word_to_isc_bits(iter->io.io_int_word) - <= isc_bits) - continue; - break; - } - list_add_tail(&inti->list, &iter->list); - } - atomic_set(&fi->active, 1); - sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); - if (sigcpu == KVM_MAX_VCPUS) { - do { - sigcpu = fi->next_rr_cpu++; - if (sigcpu == KVM_MAX_VCPUS) - sigcpu = fi->next_rr_cpu = 0; - } while (fi->local_int[sigcpu] == NULL); - } - li = fi->local_int[sigcpu]; - spin_lock_bh(&li->lock); - atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(li->wq)) - wake_up_interruptible(li->wq); - spin_unlock_bh(&li->lock); - spin_unlock(&fi->lock); - mutex_unlock(&kvm->lock); + __inject_vm(kvm, inti); return 0; } @@ -841,3 +839,207 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, mutex_unlock(&vcpu->kvm->lock); return 0; } + +static void clear_floating_interrupts(struct kvm *kvm) +{ + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *n, *inti = NULL; + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { + list_del(&inti->list); + kfree(inti); + } + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); +} + +static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, + u8 *addr) +{ + struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; + struct kvm_s390_irq irq = {0}; + + irq.type = inti->type; + switch (inti->type) { + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + irq.u.ext = inti->ext; + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + irq.u.io = inti->io; + break; + case KVM_S390_MCHK: + irq.u.mchk = inti->mchk; + break; + default: + return -EINVAL; + } + + if (copy_to_user(uptr, &irq, sizeof(irq))) + return -EFAULT; + + return 0; +} + +static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len) +{ + struct kvm_s390_interrupt_info *inti; + struct kvm_s390_float_interrupt *fi; + int ret = 0; + int n = 0; + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + + list_for_each_entry(inti, &fi->list, list) { + if (len < sizeof(struct kvm_s390_irq)) { + /* signal userspace to try again */ + ret = -ENOMEM; + break; + } + ret = copy_irq_to_user(inti, buf); + if (ret) + break; + buf += sizeof(struct kvm_s390_irq); + len -= sizeof(struct kvm_s390_irq); + n++; + } + + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); + + return ret < 0 ? ret : n; +} + +static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + int r; + + switch (attr->group) { + case KVM_DEV_FLIC_GET_ALL_IRQS: + r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr, + attr->attr); + break; + default: + r = -EINVAL; + } + + return r; +} + +static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti, + u64 addr) +{ + struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; + void *target = NULL; + void __user *source; + u64 size; + + if (get_user(inti->type, (u64 __user *)addr)) + return -EFAULT; + + switch (inti->type) { + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + target = (void *) &inti->ext; + source = &uptr->u.ext; + size = sizeof(inti->ext); + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + target = (void *) &inti->io; + source = &uptr->u.io; + size = sizeof(inti->io); + break; + case KVM_S390_MCHK: + target = (void *) &inti->mchk; + source = &uptr->u.mchk; + size = sizeof(inti->mchk); + break; + default: + return -EINVAL; + } + + if (copy_from_user(target, source, size)) + return -EFAULT; + + return 0; +} + +static int enqueue_floating_irq(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct kvm_s390_interrupt_info *inti = NULL; + int r = 0; + int len = attr->attr; + + if (len % sizeof(struct kvm_s390_irq) != 0) + return -EINVAL; + else if (len > KVM_S390_FLIC_MAX_BUFFER) + return -EINVAL; + + while (len >= sizeof(struct kvm_s390_irq)) { + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + r = copy_irq_from_user(inti, attr->addr); + if (r) { + kfree(inti); + return r; + } + __inject_vm(dev->kvm, inti); + len -= sizeof(struct kvm_s390_irq); + attr->addr += sizeof(struct kvm_s390_irq); + } + + return r; +} + +static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + int r = 0; + + switch (attr->group) { + case KVM_DEV_FLIC_ENQUEUE: + r = enqueue_floating_irq(dev, attr); + break; + case KVM_DEV_FLIC_CLEAR_IRQS: + r = 0; + clear_floating_interrupts(dev->kvm); + break; + default: + r = -EINVAL; + } + + return r; +} + +static int flic_create(struct kvm_device *dev, u32 type) +{ + if (!dev) + return -EINVAL; + if (dev->kvm->arch.flic) + return -EINVAL; + dev->kvm->arch.flic = dev; + return 0; +} + +static void flic_destroy(struct kvm_device *dev) +{ + dev->kvm->arch.flic = NULL; + kfree(dev); +} + +/* s390 floating irq controller (flic) */ +struct kvm_device_ops kvm_flic_ops = { + .name = "kvm-flic", + .get_attr = flic_get_attr, + .set_attr = flic_set_attr, + .create = flic_create, + .destroy = flic_destroy, +}; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e0676f390d57..782420f3c4d5 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -157,6 +157,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_ENABLE_CAP: case KVM_CAP_S390_CSS_SUPPORT: case KVM_CAP_IOEVENTFD: + case KVM_CAP_DEVICE_CTRL: r = 1; break; case KVM_CAP_NR_VCPUS: diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b8e9a43e501a..c0102ef2de48 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1064,6 +1064,7 @@ extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_vfio_ops; extern struct kvm_device_ops kvm_arm_vgic_v2_ops; +extern struct kvm_device_ops kvm_flic_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 86faf47ae494..19f717b15297 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -918,6 +918,7 @@ struct kvm_device_attr { #define KVM_DEV_VFIO_GROUP_ADD 1 #define KVM_DEV_VFIO_GROUP_DEL 2 #define KVM_DEV_TYPE_ARM_VGIC_V2 5 +#define KVM_DEV_TYPE_FLIC 6 /* * ioctls for VM fds diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 03a0381b1cb7..a9e999a48e43 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2283,6 +2283,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, case KVM_DEV_TYPE_ARM_VGIC_V2: ops = &kvm_arm_vgic_v2_ops; break; +#endif +#ifdef CONFIG_S390 + case KVM_DEV_TYPE_FLIC: + ops = &kvm_flic_ops; + break; #endif default: return -ENODEV; -- cgit v1.2.3-55-g7522 From a91b8ebe8671980151e0a19ee9fec6b0e1ae1d58 Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Thu, 30 Jan 2014 08:40:23 +0100 Subject: KVM: s390: limit floating irqs Userspace can flood the kernel with interrupts as of now, so let's limit the number of pending floating interrupts injected via either the floating interrupt controller or the KVM_S390_INTERRUPT ioctl. We can have up to 4*64k pending subchannels + 8 adapter interrupts, as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. There are also sclp and machine checks. This gives us (4*65536+8+64*64+1+1) = 266250 interrupts. Suggested-by: Christian Borntraeger Signed-off-by: Jens Freimann Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/include/uapi/asm/kvm.h | 2 +- arch/s390/kvm/interrupt.c | 26 +++++++++++++++++++++----- 3 files changed, 23 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 59635b5c59a6..c3c5e1028136 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -214,6 +214,7 @@ struct kvm_s390_float_interrupt { unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1) / sizeof(long)]; struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS]; + unsigned int irq_count; }; diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 38d5f98552bb..058b178391d4 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -27,7 +27,7 @@ * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000 * Lets round up to 8192 pages. */ - +#define KVM_S390_MAX_FLOAT_IRQS 266250 #define KVM_S390_FLIC_MAX_BUFFER 0x2000000 /* for KVM_GET_REGS and KVM_SET_REGS */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a5f18babed4c..9c9192b5e339 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -528,6 +528,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) list_for_each_entry_safe(inti, n, &fi->list, list) { if (__interrupt_is_deliverable(vcpu, inti)) { list_del(&inti->list); + fi->irq_count--; deliver = 1; break; } @@ -583,6 +584,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu) if ((inti->type == KVM_S390_MCHK) && __interrupt_is_deliverable(vcpu, inti)) { list_del(&inti->list); + fi->irq_count--; deliver = 1; break; } @@ -650,8 +652,10 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, inti = iter; break; } - if (inti) + if (inti) { list_del_init(&inti->list); + fi->irq_count--; + } if (list_empty(&fi->list)) atomic_set(&fi->active, 0); spin_unlock(&fi->lock); @@ -659,16 +663,22 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, return inti; } -static void __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) +static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { struct kvm_s390_local_interrupt *li; struct kvm_s390_float_interrupt *fi; struct kvm_s390_interrupt_info *iter; int sigcpu; + int rc = 0; mutex_lock(&kvm->lock); fi = &kvm->arch.float_int; spin_lock(&fi->lock); + if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) { + rc = -EINVAL; + goto unlock_fi; + } + fi->irq_count++; if (!is_ioint(inti->type)) { list_add_tail(&inti->list, &fi->list); } else { @@ -700,8 +710,10 @@ static void __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) if (waitqueue_active(li->wq)) wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); +unlock_fi: spin_unlock(&fi->lock); mutex_unlock(&kvm->lock); + return rc; } int kvm_s390_inject_vm(struct kvm *kvm, @@ -751,8 +763,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64, 2); - __inject_vm(kvm, inti); - return 0; + return __inject_vm(kvm, inti); } int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, @@ -852,6 +863,7 @@ static void clear_floating_interrupts(struct kvm *kvm) list_del(&inti->list); kfree(inti); } + fi->irq_count = 0; atomic_set(&fi->active, 0); spin_unlock(&fi->lock); mutex_unlock(&kvm->lock); @@ -992,7 +1004,11 @@ static int enqueue_floating_irq(struct kvm_device *dev, kfree(inti); return r; } - __inject_vm(dev->kvm, inti); + r = __inject_vm(dev->kvm, inti); + if (r) { + kfree(inti); + return r; + } len -= sizeof(struct kvm_s390_irq); attr->addr += sizeof(struct kvm_s390_irq); } -- cgit v1.2.3-55-g7522 From 24eb3a824c4f3ccfaa2305dc1d9d9e2a708828c5 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Mon, 17 Jun 2013 16:25:18 +0200 Subject: KVM: s390: Add FAULT_FLAG_RETRY_NOWAIT for guest fault In the case of a fault, we will retry to exit sie64 but with gmap fault indication for this thread set. This makes it possible to handle async page faults. Based on a patch from Martin Schwidefsky. Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/pgtable.h | 2 ++ arch/s390/include/asm/processor.h | 1 + arch/s390/kvm/kvm-s390.c | 23 +++++++++++++++++++++-- arch/s390/mm/fault.c | 26 ++++++++++++++++++++++---- 4 files changed, 46 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2204400d0bd5..66101f6c6d81 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -767,6 +767,7 @@ static inline void pgste_set_pte(pte_t *ptep, pte_t entry) * @table: pointer to the page directory * @asce: address space control element for gmap page table * @crst_list: list of all crst tables used in the guest address space + * @pfault_enabled: defines if pfaults are applicable for the guest */ struct gmap { struct list_head list; @@ -775,6 +776,7 @@ struct gmap { unsigned long asce; void *private; struct list_head crst_list; + bool pfault_enabled; }; /** diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 0a876bc543d3..dc5fc4f90e52 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -79,6 +79,7 @@ struct thread_struct { unsigned long ksp; /* kernel stack pointer */ mm_segment_t mm_segment; unsigned long gmap_addr; /* address of last gmap fault. */ + unsigned int gmap_pfault; /* signal of a pending guest pfault */ struct per_regs per_user; /* User specified PER registers */ struct per_event per_event; /* Cause of the last PER trap */ unsigned long per_flags; /* Flags to control debug behavior */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 782420f3c4d5..9eec794caa7f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -255,6 +255,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.gmap) goto out_nogmap; kvm->arch.gmap->private = kvm; + kvm->arch.gmap->pfault_enabled = 0; } kvm->arch.css_support = 0; @@ -701,6 +702,17 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) return 0; } +static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu) +{ + long rc; + hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap); + struct mm_struct *mm = current->mm; + down_read(&mm->mmap_sem); + rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL); + up_read(&mm->mmap_sem); + return rc; +} + static int vcpu_pre_run(struct kvm_vcpu *vcpu) { int rc, cpuflags; @@ -730,7 +742,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) { - int rc; + int rc = -1; VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); @@ -744,7 +756,14 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) current->thread.gmap_addr; vcpu->run->s390_ucontrol.pgm_code = 0x10; rc = -EREMOTE; - } else { + + } else if (current->thread.gmap_pfault) { + current->thread.gmap_pfault = 0; + if (kvm_arch_fault_in_sync(vcpu) >= 0) + rc = 0; + } + + if (rc == -1) { VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); trace_kvm_s390_sie_fault(vcpu); rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index d95265b2719f..88cef505453b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -50,6 +50,7 @@ #define VM_FAULT_BADMAP 0x020000 #define VM_FAULT_BADACCESS 0x040000 #define VM_FAULT_SIGNAL 0x080000 +#define VM_FAULT_PFAULT 0x100000 static unsigned long store_indication __read_mostly; @@ -227,6 +228,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault) return; } case VM_FAULT_BADCONTEXT: + case VM_FAULT_PFAULT: do_no_context(regs); break; case VM_FAULT_SIGNAL: @@ -264,6 +266,9 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault) */ static inline int do_exception(struct pt_regs *regs, int access) { +#ifdef CONFIG_PGSTE + struct gmap *gmap; +#endif struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; @@ -304,9 +309,10 @@ static inline int do_exception(struct pt_regs *regs, int access) down_read(&mm->mmap_sem); #ifdef CONFIG_PGSTE - if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { - address = __gmap_fault(address, - (struct gmap *) S390_lowcore.gmap); + gmap = (struct gmap *) + ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0); + if (gmap) { + address = __gmap_fault(address, gmap); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; goto out_up; @@ -315,6 +321,8 @@ static inline int do_exception(struct pt_regs *regs, int access) fault = VM_FAULT_OOM; goto out_up; } + if (gmap->pfault_enabled) + flags |= FAULT_FLAG_RETRY_NOWAIT; } #endif @@ -371,9 +379,19 @@ retry: regs, address); } if (fault & VM_FAULT_RETRY) { +#ifdef CONFIG_PGSTE + if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { + /* FAULT_FLAG_RETRY_NOWAIT has been set, + * mmap_sem has not been released */ + current->thread.gmap_pfault = 1; + fault = VM_FAULT_PFAULT; + goto out_up; + } +#endif /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags &= ~(FAULT_FLAG_ALLOW_RETRY | + FAULT_FLAG_RETRY_NOWAIT); flags |= FAULT_FLAG_TRIED; down_read(&mm->mmap_sem); goto retry; -- cgit v1.2.3-55-g7522 From 3c038e6be0e299d4d3762d0a9a29f02de6e04991 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Mon, 7 Oct 2013 17:11:48 +0200 Subject: KVM: async_pf: Async page fault support on s390 This patch enables async page faults for s390 kvm guests. It provides the userspace API to enable and disable_wait this feature. The disable_wait will enforce that the feature is off by waiting on it. Also it includes the diagnose code, called by the guest to enable async page faults. The async page faults will use an already existing guest interface for this purpose, as described in "CP Programming Services (SC24-6084)". Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/devices/s390_flic.txt | 10 +++ arch/s390/include/asm/kvm_host.h | 22 ++++++ arch/s390/include/uapi/asm/kvm.h | 2 + arch/s390/kvm/Kconfig | 2 + arch/s390/kvm/Makefile | 2 +- arch/s390/kvm/diag.c | 84 +++++++++++++++++++++++ arch/s390/kvm/interrupt.c | 65 ++++++++++++++++-- arch/s390/kvm/kvm-s390.c | 89 ++++++++++++++++++++++++- arch/s390/kvm/kvm-s390.h | 4 ++ arch/s390/kvm/sigp.c | 7 ++ arch/s390/kvm/trace.h | 46 +++++++++++++ include/uapi/linux/kvm.h | 2 + 12 files changed, 326 insertions(+), 9 deletions(-) (limited to 'arch/s390') diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt index 6b557953066a..410fa673e5b6 100644 --- a/Documentation/virtual/kvm/devices/s390_flic.txt +++ b/Documentation/virtual/kvm/devices/s390_flic.txt @@ -11,6 +11,7 @@ FLIC provides support to - add interrupts (KVM_DEV_FLIC_ENQUEUE) - inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS) - purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS) +- enable/disable for the guest transparent async page faults Groups: KVM_DEV_FLIC_ENQUEUE @@ -34,3 +35,12 @@ Groups: KVM_DEV_FLIC_CLEAR_IRQS Simply deletes all elements from the list of currently pending floating interrupts. No interrupts are injected into the guest. + + KVM_DEV_FLIC_APF_ENABLE + Enables async page faults for the guest. So in case of a major page fault + the host is allowed to handle this async and continues the guest. + + KVM_DEV_FLIC_APF_DISABLE_WAIT + Disables async page faults for the guest and waits until already pending + async page faults are done. This is necessary to trigger a completion interrupt + for every init interrupt before migrating the interrupt list. diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index c3c5e1028136..2c69ba285e81 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -231,6 +231,10 @@ struct kvm_vcpu_arch { u64 stidp_data; }; struct gmap *gmap; +#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL) + unsigned long pfault_token; + unsigned long pfault_select; + unsigned long pfault_compare; }; struct kvm_vm_stat { @@ -257,6 +261,24 @@ static inline bool kvm_is_error_hva(unsigned long addr) return IS_ERR_VALUE(addr); } +#define ASYNC_PF_PER_VCPU 64 +struct kvm_vcpu; +struct kvm_async_pf; +struct kvm_arch_async_pf { + unsigned long pfault_token; +}; + +bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); + +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); + +void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); + +void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); + extern int sie64a(struct kvm_s390_sie_block *, u64 *); extern char sie_exit; #endif diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 058b178391d4..ccfd0b1d056d 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -20,6 +20,8 @@ #define KVM_DEV_FLIC_GET_ALL_IRQS 1 #define KVM_DEV_FLIC_ENQUEUE 2 #define KVM_DEV_FLIC_CLEAR_IRQS 3 +#define KVM_DEV_FLIC_APF_ENABLE 4 +#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5 /* * We can have up to 4*64k pending subchannels + 8 adapter interrupts, * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 70b46eacf8e1..c8bacbcd2e5b 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -23,6 +23,8 @@ config KVM select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_EVENTFD + select KVM_ASYNC_PF + select KVM_ASYNC_PF_SYNC ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 40b4c6470f88..a47d2c355f68 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -7,7 +7,7 @@ # as published by the Free Software Foundation. KVM := ../../../virt/kvm -common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 8216c0e0b2e2..bf9ed34c2bcd 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -17,6 +17,7 @@ #include "kvm-s390.h" #include "trace.h" #include "trace-s390.h" +#include "gaccess.h" static int diag_release_pages(struct kvm_vcpu *vcpu) { @@ -46,6 +47,87 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) return 0; } +static int __diag_page_ref_service(struct kvm_vcpu *vcpu) +{ + struct prs_parm { + u16 code; + u16 subcode; + u16 parm_len; + u16 parm_version; + u64 token_addr; + u64 select_mask; + u64 compare_mask; + u64 zarch; + }; + struct prs_parm parm; + int rc; + u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4; + u16 ry = (vcpu->arch.sie_block->ipa & 0x0f); + unsigned long hva_token = KVM_HVA_ERR_BAD; + + if (vcpu->run->s.regs.gprs[rx] & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + switch (parm.subcode) { + case 0: /* TOKEN */ + if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) { + /* + * If the pagefault handshake is already activated, + * the token must not be changed. We have to return + * decimal 8 instead, as mandated in SC24-6084. + */ + vcpu->run->s.regs.gprs[ry] = 8; + return 0; + } + + if ((parm.compare_mask & parm.select_mask) != parm.compare_mask || + parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr)); + if (kvm_is_error_hva(hva_token)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + vcpu->arch.pfault_token = parm.token_addr; + vcpu->arch.pfault_select = parm.select_mask; + vcpu->arch.pfault_compare = parm.compare_mask; + vcpu->run->s.regs.gprs[ry] = 0; + rc = 0; + break; + case 1: /* + * CANCEL + * Specification allows to let already pending tokens survive + * the cancel, therefore to reduce code complexity, we assume + * all outstanding tokens are already pending. + */ + if (parm.token_addr || parm.select_mask || + parm.compare_mask || parm.zarch) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + vcpu->run->s.regs.gprs[ry] = 0; + /* + * If the pfault handling was not established or is already + * canceled SC24-6084 requests to return decimal 4. + */ + if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) + vcpu->run->s.regs.gprs[ry] = 4; + else + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + + rc = 0; + break; + default: + rc = -EOPNOTSUPP; + break; + } + + return rc; +} + static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); @@ -150,6 +232,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) return __diag_time_slice_end(vcpu); case 0x9c: return __diag_time_slice_end_directed(vcpu); + case 0x258: + return __diag_page_ref_service(vcpu); case 0x308: return __diag_ipl_functions(vcpu); case 0x500: diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9c9192b5e339..1848080c3f34 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -31,7 +31,7 @@ static int is_ioint(u64 type) return ((type & 0xfffe0000u) != 0xfffe0000u); } -static int psw_extint_disabled(struct kvm_vcpu *vcpu) +int psw_extint_disabled(struct kvm_vcpu *vcpu) { return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); } @@ -78,11 +78,8 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, return 1; return 0; case KVM_S390_INT_SERVICE: - if (psw_extint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x200ul) - return 1; - return 0; + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: if (psw_extint_disabled(vcpu)) return 0; @@ -150,6 +147,8 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EMERGENCY: case KVM_S390_INT_SERVICE: + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: if (psw_extint_disabled(vcpu)) __set_cpuflag(vcpu, CPUSTAT_EXT_INT); @@ -223,6 +222,30 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, rc |= put_guest(vcpu, inti->ext.ext_params, (u32 __user *)__LC_EXT_PARAMS); break; + case KVM_S390_INT_PFAULT_INIT: + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, + inti->ext.ext_params2); + rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE); + rc |= put_guest(vcpu, 0x0600, (u16 __user *) __LC_EXT_CPU_ADDR); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + rc |= put_guest(vcpu, inti->ext.ext_params2, + (u64 __user *) __LC_EXT_PARAMS2); + break; + case KVM_S390_INT_PFAULT_DONE: + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, + inti->ext.ext_params2); + rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE); + rc |= put_guest(vcpu, 0x0680, (u16 __user *) __LC_EXT_CPU_ADDR); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + rc |= put_guest(vcpu, inti->ext.ext_params2, + (u64 __user *) __LC_EXT_PARAMS2); + break; case KVM_S390_INT_VIRTIO: VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", inti->ext.ext_params, inti->ext.ext_params2); @@ -357,7 +380,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) return 1; } -static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; @@ -737,6 +760,10 @@ int kvm_s390_inject_vm(struct kvm *kvm, VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); inti->ext.ext_params = s390int->parm; break; + case KVM_S390_INT_PFAULT_DONE: + inti->type = s390int->type; + inti->ext.ext_params2 = s390int->parm64; + break; case KVM_S390_MCHK: VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", s390int->parm64); @@ -823,6 +850,10 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, inti->type = s390int->type; inti->mchk.mcic = s390int->parm64; break; + case KVM_S390_INT_PFAULT_INIT: + inti->type = s390int->type; + inti->ext.ext_params2 = s390int->parm64; + break; case KVM_S390_INT_VIRTIO: case KVM_S390_INT_SERVICE: case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: @@ -877,6 +908,8 @@ static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, irq.type = inti->type; switch (inti->type) { + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: case KVM_S390_INT_SERVICE: irq.u.ext = inti->ext; @@ -956,6 +989,8 @@ static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti, return -EFAULT; switch (inti->type) { + case KVM_S390_INT_PFAULT_INIT: + case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: case KVM_S390_INT_SERVICE: target = (void *) &inti->ext; @@ -1019,6 +1054,8 @@ static int enqueue_floating_irq(struct kvm_device *dev, static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r = 0; + unsigned int i; + struct kvm_vcpu *vcpu; switch (attr->group) { case KVM_DEV_FLIC_ENQUEUE: @@ -1028,6 +1065,20 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) r = 0; clear_floating_interrupts(dev->kvm); break; + case KVM_DEV_FLIC_APF_ENABLE: + dev->kvm->arch.gmap->pfault_enabled = 1; + break; + case KVM_DEV_FLIC_APF_DISABLE_WAIT: + dev->kvm->arch.gmap->pfault_enabled = 0; + /* + * Make sure no async faults are in transition when + * clearing the queues. So we don't need to worry + * about late coming workers. + */ + synchronize_srcu(&dev->kvm->srcu); + kvm_for_each_vcpu(i, vcpu, dev->kvm) + kvm_clear_async_pf_completion_queue(vcpu); + break; default: r = -EINVAL; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9eec794caa7f..d8e9f04977db 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -152,6 +152,7 @@ int kvm_dev_ioctl_check_extension(long ext) #ifdef CONFIG_KVM_S390_UCONTROL case KVM_CAP_S390_UCONTROL: #endif + case KVM_CAP_ASYNC_PF: case KVM_CAP_SYNC_REGS: case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: @@ -273,6 +274,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 3, "%s", "free cpu"); trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); + kvm_clear_async_pf_completion_queue(vcpu); if (!kvm_is_ucontrol(vcpu->kvm)) { clear_bit(63 - vcpu->vcpu_id, (unsigned long *) &vcpu->kvm->arch.sca->mcn); @@ -322,6 +324,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) /* Section: vcpu related */ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + kvm_clear_async_pf_completion_queue(vcpu); if (kvm_is_ucontrol(vcpu->kvm)) { vcpu->arch.gmap = gmap_alloc(current->mm); if (!vcpu->arch.gmap) @@ -382,6 +386,8 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.guest_fpregs.fpc = 0; asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); vcpu->arch.sie_block->gbea = 1; + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + kvm_clear_async_pf_completion_queue(vcpu); atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); } @@ -713,10 +719,89 @@ static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu) return rc; } +static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, + unsigned long token) +{ + struct kvm_s390_interrupt inti; + inti.parm64 = token; + + if (start_token) { + inti.type = KVM_S390_INT_PFAULT_INIT; + WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti)); + } else { + inti.type = KVM_S390_INT_PFAULT_DONE; + WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti)); + } +} + +void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ + trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); + __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); +} + +void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ + trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token); + __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token); +} + +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ + /* s390 will always inject the page directly */ +} + +bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) +{ + /* + * s390 will always inject the page directly, + * but we still want check_async_completion to cleanup + */ + return true; +} + +static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) +{ + hva_t hva; + struct kvm_arch_async_pf arch; + int rc; + + if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) + return 0; + if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) != + vcpu->arch.pfault_compare) + return 0; + if (psw_extint_disabled(vcpu)) + return 0; + if (kvm_cpu_has_interrupt(vcpu)) + return 0; + if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) + return 0; + if (!vcpu->arch.gmap->pfault_enabled) + return 0; + + hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap); + if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8)) + return 0; + + rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch); + return rc; +} + static int vcpu_pre_run(struct kvm_vcpu *vcpu) { int rc, cpuflags; + /* + * On s390 notifications for arriving pages will be delivered directly + * to the guest but the house keeping for completed pfaults is + * handled outside the worker. + */ + kvm_check_async_pf_completion(vcpu); + memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); if (need_resched()) @@ -758,8 +843,10 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) rc = -EREMOTE; } else if (current->thread.gmap_pfault) { + trace_kvm_s390_major_guest_pfault(vcpu); current->thread.gmap_pfault = 0; - if (kvm_arch_fault_in_sync(vcpu) >= 0) + if (kvm_arch_setup_async_pf(vcpu) || + (kvm_arch_fault_in_sync(vcpu) >= 0)) rc = 0; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index f9559b0bd620..ed4750a5bc3c 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -159,4 +159,8 @@ void exit_sie_sync(struct kvm_vcpu *vcpu); /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); +/* implemented in interrupt.c */ +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int psw_extint_disabled(struct kvm_vcpu *vcpu); + #endif diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 87c2b3a3bd3e..fe9442d39f0e 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -224,6 +224,8 @@ unlock: static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) { int rc; + unsigned int i; + struct kvm_vcpu *v; switch (parameter & 0xff) { case 0: @@ -231,6 +233,11 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) break; case 1: case 2: + kvm_for_each_vcpu(i, v, vcpu->kvm) { + v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + kvm_clear_async_pf_completion_queue(v); + } + rc = SIGP_CC_ORDER_CODE_ACCEPTED; break; default: diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index 3db76b2daed7..e8e7213d4cc5 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -30,6 +30,52 @@ TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \ __entry->pswmask, __entry->pswaddr, p_args) +TRACE_EVENT(kvm_s390_major_guest_pfault, + TP_PROTO(VCPU_PROTO_COMMON), + TP_ARGS(VCPU_ARGS_COMMON), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + ), + VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault") + ); + +TRACE_EVENT(kvm_s390_pfault_init, + TP_PROTO(VCPU_PROTO_COMMON, long pfault_token), + TP_ARGS(VCPU_ARGS_COMMON, pfault_token), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(long, pfault_token) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->pfault_token = pfault_token; + ), + VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token) + ); + +TRACE_EVENT(kvm_s390_pfault_done, + TP_PROTO(VCPU_PROTO_COMMON, long pfault_token), + TP_ARGS(VCPU_ARGS_COMMON, pfault_token), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(long, pfault_token) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->pfault_token = pfault_token; + ), + VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token) + ); + /* * Tracepoints for SIE entry and exit. */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 19f717b15297..7d76401d2bb5 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -413,6 +413,8 @@ struct kvm_s390_psw { #define KVM_S390_PROGRAM_INT 0xfffe0001u #define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u #define KVM_S390_RESTART 0xfffe0003u +#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u +#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u #define KVM_S390_MCHK 0xfffe1000u #define KVM_S390_INT_VIRTIO 0xffff2603u #define KVM_S390_INT_SERVICE 0xffff2401u -- cgit v1.2.3-55-g7522 From 536336c21697551ceca44bdffb9f53e6cc5f2f20 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Mon, 30 Sep 2013 10:55:33 +0200 Subject: KVM: async_pf: Exploit one reg interface for pfault To enable pfault after live migration we need to expose pfault_token, pfault_select and pfault_compare, as one reg registers to userspace. So that qemu is able to transfer this between the source and the target. Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger --- arch/s390/include/uapi/asm/kvm.h | 3 +++ arch/s390/kvm/kvm-s390.c | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index ccfd0b1d056d..cb4c1eb8a0a5 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -73,4 +73,7 @@ struct kvm_sync_regs { #define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2) #define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3) #define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4) +#define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5) +#define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6) +#define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7) #endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d8e9f04977db..a5da2cc798c8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -561,6 +561,18 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, r = put_user(vcpu->arch.sie_block->ckc, (u64 __user *)reg->addr); break; + case KVM_REG_S390_PFTOKEN: + r = put_user(vcpu->arch.pfault_token, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFCOMPARE: + r = put_user(vcpu->arch.pfault_compare, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFSELECT: + r = put_user(vcpu->arch.pfault_select, + (u64 __user *)reg->addr); + break; default: break; } @@ -590,6 +602,18 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, r = get_user(vcpu->arch.sie_block->ckc, (u64 __user *)reg->addr); break; + case KVM_REG_S390_PFTOKEN: + r = get_user(vcpu->arch.pfault_token, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFCOMPARE: + r = get_user(vcpu->arch.pfault_compare, + (u64 __user *)reg->addr); + break; + case KVM_REG_S390_PFSELECT: + r = get_user(vcpu->arch.pfault_select, + (u64 __user *)reg->addr); + break; default: break; } -- cgit v1.2.3-55-g7522 From f87618e870d03ac114dd5496b23f6f628af54152 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Wed, 26 Feb 2014 16:14:17 +0100 Subject: KVM: s390: implementation of kvm_arch_vcpu_runnable() A vcpu is defined to be runnable if an interrupt is pending. Signed-off-by: Michael Mueller Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- arch/s390/kvm/kvm-s390.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index a5da2cc798c8..18959bb75c51 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -483,9 +483,7 @@ out: int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { - /* kvm common code refers to this, but never calls it */ - BUG(); - return 0; + return kvm_cpu_has_interrupt(vcpu); } void s390_vcpu_block(struct kvm_vcpu *vcpu) -- cgit v1.2.3-55-g7522 From 9cac38dd5dc41c943d711b96f9755a29c8b854ea Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Wed, 26 Feb 2014 16:14:19 +0100 Subject: KVM/s390: Set preempted flag during vcpu wakeup and interrupt delivery Commit "kvm: Record the preemption status of vcpus using preempt notifiers" caused a performance regression on s390. It turned out that in the case that if a former sleeping cpu, that was woken up, this cpu is not a yield candidate since it gave up the cpu voluntarily. To retain this candiate its preempted flag is set during wakeup and interrupt delivery time. Significant performance measurement work and code analysis to solve this issue was provided by Mao Chuan Li and his team in Beijing. Signed-off-by: Michael Mueller Reviewed-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- arch/s390/kvm/interrupt.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 1848080c3f34..fff070bd0159 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -505,6 +505,7 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) struct kvm_vcpu *vcpu; vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); + vcpu->preempted = true; tasklet_schedule(&vcpu->arch.tasklet); return HRTIMER_NORESTART; @@ -732,6 +733,7 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); if (waitqueue_active(li->wq)) wake_up_interruptible(li->wq); + kvm_get_vcpu(kvm, sigcpu)->preempted = true; spin_unlock_bh(&li->lock); unlock_fi: spin_unlock(&fi->lock); @@ -877,6 +879,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); + vcpu->preempted = true; spin_unlock_bh(&li->lock); mutex_unlock(&vcpu->kvm->lock); return 0; -- cgit v1.2.3-55-g7522 From 672550fb682e9935e1a318bf4ac3f611a057dee1 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 10 Feb 2014 15:32:19 +0100 Subject: KVM: s390: Provide access to program parameter commit d208c79d63e06457eef077af770d23dc4cde4d43 (KVM: s390: Enable the LPP facility for guests) enabled the LPP instruction for guests. We should expose the program parameter as a pseudo register for migration/reset etc. Lets also reset this value on initial CPU reset. Signed-off-by: Christian Borntraeger Reviewed-by: Thomas Huth Reviewed-by: Jason J. Herne --- arch/s390/include/asm/kvm_host.h | 4 +++- arch/s390/include/uapi/asm/kvm.h | 1 + arch/s390/kvm/kvm-s390.c | 9 +++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 2c69ba285e81..062b78cde307 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -107,7 +107,9 @@ struct kvm_s390_sie_block { __u64 gbea; /* 0x0180 */ __u8 reserved188[24]; /* 0x0188 */ __u32 fac; /* 0x01a0 */ - __u8 reserved1a4[68]; /* 0x01a4 */ + __u8 reserved1a4[58]; /* 0x01a4 */ + __u64 pp; /* 0x01de */ + __u8 reserved1e6[2]; /* 0x01e6 */ __u64 itdba; /* 0x01e8 */ __u8 reserved1f0[16]; /* 0x01f0 */ } __attribute__((packed)); diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index cb4c1eb8a0a5..766324476f12 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -76,4 +76,5 @@ struct kvm_sync_regs { #define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5) #define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6) #define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7) +#define KVM_REG_S390_PP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8) #endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 18959bb75c51..0262936e18be 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -386,6 +386,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.guest_fpregs.fpc = 0; asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); vcpu->arch.sie_block->gbea = 1; + vcpu->arch.sie_block->pp = 0; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; kvm_clear_async_pf_completion_queue(vcpu); atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); @@ -571,6 +572,10 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, r = put_user(vcpu->arch.pfault_select, (u64 __user *)reg->addr); break; + case KVM_REG_S390_PP: + r = put_user(vcpu->arch.sie_block->pp, + (u64 __user *)reg->addr); + break; default: break; } @@ -612,6 +617,10 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, r = get_user(vcpu->arch.pfault_select, (u64 __user *)reg->addr); break; + case KVM_REG_S390_PP: + r = get_user(vcpu->arch.sie_block->pp, + (u64 __user *)reg->addr); + break; default: break; } -- cgit v1.2.3-55-g7522 From afa45ff521130cee79a50b565693388be8c8c9c2 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 10 Feb 2014 15:39:23 +0100 Subject: KVM: s390: expose gbea register to userspace For migration/reset we want to expose the guest breaking event address register to userspace. Lets use ONE_REG for that purpose. Signed-off-by: Christian Borntraeger Reviewed-by: Jason J. Herne --- arch/s390/include/uapi/asm/kvm.h | 1 + arch/s390/kvm/kvm-s390.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 766324476f12..2f0ade24f96a 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -77,4 +77,5 @@ struct kvm_sync_regs { #define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6) #define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7) #define KVM_REG_S390_PP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8) +#define KVM_REG_S390_GBEA (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x9) #endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0262936e18be..a3e4c07ec3a5 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -576,6 +576,10 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, r = put_user(vcpu->arch.sie_block->pp, (u64 __user *)reg->addr); break; + case KVM_REG_S390_GBEA: + r = put_user(vcpu->arch.sie_block->gbea, + (u64 __user *)reg->addr); + break; default: break; } @@ -621,6 +625,10 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, r = get_user(vcpu->arch.sie_block->pp, (u64 __user *)reg->addr); break; + case KVM_REG_S390_GBEA: + r = get_user(vcpu->arch.sie_block->gbea, + (u64 __user *)reg->addr); + break; default: break; } -- cgit v1.2.3-55-g7522 From ff520a6327e83ef55515d1be3d0a1b10c084f59c Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Mon, 24 Feb 2014 10:11:41 +0100 Subject: KVM: s390: Simplify online vcpus counting for stsi We don't need to loop over all cpus to get the number of vcpus. Let's use the available counter online_vcpus instead. Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger --- arch/s390/kvm/priv.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 75beea632a10..ae9e8ee21557 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -396,15 +396,10 @@ static int handle_stidp(struct kvm_vcpu *vcpu) static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; int cpus = 0; int n; - spin_lock(&fi->lock); - for (n = 0; n < KVM_MAX_VCPUS; n++) - if (fi->local_int[n]) - cpus++; - spin_unlock(&fi->lock); + cpus = atomic_read(&vcpu->kvm->online_vcpus); /* deal with other level 3 hypervisors */ if (stsi(mem, 3, 2, 2)) -- cgit v1.2.3-55-g7522 From 13b191ae4afc0c29a5cd768f521ede5c72a608cb Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 27 Jan 2014 12:06:19 +0100 Subject: KVM: s390: Fixed CC of SIGP SET_PREFIX handler When SIGP SET_PREFIX is called with an illegal CPU id, it must return the condition code 3 ("not operational") instead of 1. Also fixed the order in which the checks are done - CC3 has a higher priority than CC1. And while we're at it, this patch also get rid of the floating interrupt lock here by using kvm_get_vcpu() to get the local_int struct of the destination CPU. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/sigp.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index fe9442d39f0e..466eefa18708 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -249,12 +249,18 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, u64 *reg) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; - struct kvm_s390_local_interrupt *li = NULL; + struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; struct kvm_s390_interrupt_info *inti; int rc; u8 tmp; + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + /* make sure that the new value is valid memory */ address = address & 0x7fffe000u; if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || @@ -268,18 +274,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, if (!inti) return SIGP_CC_BUSY; - spin_lock(&fi->lock); - if (cpu_addr < KVM_MAX_VCPUS) - li = fi->local_int[cpu_addr]; - - if (li == NULL) { - *reg &= 0xffffffff00000000UL; - *reg |= SIGP_STATUS_INCORRECT_STATE; - rc = SIGP_CC_STATUS_STORED; - kfree(inti); - goto out_fi; - } - spin_lock_bh(&li->lock); /* cpu must be in stopped state */ if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { @@ -302,8 +296,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); out_li: spin_unlock_bh(&li->lock); -out_fi: - spin_unlock(&fi->lock); return rc; } -- cgit v1.2.3-55-g7522 From 1ee0bc559dc34fe36a29494faf7b7c91533bd31c Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Tue, 25 Feb 2014 15:36:45 +0100 Subject: KVM: s390: get rid of local_int array We can use kvm_get_vcpu() now and don't need the local_int array in the floating_int struct anymore. This also means we don't have to hold the float_int.lock in some places. Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 1 - arch/s390/kvm/interrupt.c | 6 +- arch/s390/kvm/kvm-s390.c | 5 +- arch/s390/kvm/sigp.c | 124 ++++++++++++++++----------------------- 4 files changed, 56 insertions(+), 80 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 062b78cde307..734d302ba389 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -215,7 +215,6 @@ struct kvm_s390_float_interrupt { int next_rr_cpu; unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1) / sizeof(long)]; - struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS]; unsigned int irq_count; }; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index fff070bd0159..1d0f9d532c0b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -692,6 +692,7 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) struct kvm_s390_local_interrupt *li; struct kvm_s390_float_interrupt *fi; struct kvm_s390_interrupt_info *iter; + struct kvm_vcpu *dst_vcpu = NULL; int sigcpu; int rc = 0; @@ -726,9 +727,10 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) sigcpu = fi->next_rr_cpu++; if (sigcpu == KVM_MAX_VCPUS) sigcpu = fi->next_rr_cpu = 0; - } while (fi->local_int[sigcpu] == NULL); + } while (kvm_get_vcpu(kvm, sigcpu) == NULL); } - li = fi->local_int[sigcpu]; + dst_vcpu = kvm_get_vcpu(kvm, sigcpu); + li = &dst_vcpu->arch.local_int; spin_lock_bh(&li->lock); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); if (waitqueue_active(li->wq)) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index a3e4c07ec3a5..9136f8d40850 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -460,11 +460,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, spin_lock_init(&vcpu->arch.local_int.lock); INIT_LIST_HEAD(&vcpu->arch.local_int.list); vcpu->arch.local_int.float_int = &kvm->arch.float_int; - spin_lock(&kvm->arch.float_int.lock); - kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; - spin_unlock(&kvm->arch.float_int.lock); rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) @@ -952,7 +949,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); - BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL); + BUG_ON(kvm_get_vcpu(vcpu->kvm, vcpu->vcpu_id) == NULL); switch (kvm_run->exit_reason) { case KVM_EXIT_S390_SIEIC: diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 466eefa18708..3fe44c441609 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -23,29 +23,30 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; + int cpuflags; int rc; if (cpu_addr >= KVM_MAX_VCPUS) return SIGP_CC_NOT_OPERATIONAL; - spin_lock(&fi->lock); - if (fi->local_int[cpu_addr] == NULL) - rc = SIGP_CC_NOT_OPERATIONAL; - else if (!(atomic_read(fi->local_int[cpu_addr]->cpuflags) - & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED))) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + + cpuflags = atomic_read(li->cpuflags); + if (!(cpuflags & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED))) rc = SIGP_CC_ORDER_CODE_ACCEPTED; else { *reg &= 0xffffffff00000000UL; - if (atomic_read(fi->local_int[cpu_addr]->cpuflags) - & CPUSTAT_ECALL_PEND) + if (cpuflags & CPUSTAT_ECALL_PEND) *reg |= SIGP_STATUS_EXT_CALL_PENDING; - if (atomic_read(fi->local_int[cpu_addr]->cpuflags) - & CPUSTAT_STOPPED) + if (cpuflags & CPUSTAT_STOPPED) *reg |= SIGP_STATUS_STOPPED; rc = SIGP_CC_STATUS_STORED; } - spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); return rc; @@ -53,10 +54,9 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li; struct kvm_s390_interrupt_info *inti; - int rc; + struct kvm_vcpu *dst_vcpu = NULL; if (cpu_addr >= KVM_MAX_VCPUS) return SIGP_CC_NOT_OPERATIONAL; @@ -68,13 +68,10 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) inti->type = KVM_S390_INT_EMERGENCY; inti->emerg.code = vcpu->vcpu_id; - spin_lock(&fi->lock); - li = fi->local_int[cpu_addr]; - if (li == NULL) { - rc = SIGP_CC_NOT_OPERATIONAL; - kfree(inti); - goto unlock; - } + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; spin_lock_bh(&li->lock); list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); @@ -82,11 +79,9 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) if (waitqueue_active(li->wq)) wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); - rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); -unlock: - spin_unlock(&fi->lock); - return rc; + + return SIGP_CC_ORDER_CODE_ACCEPTED; } static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, @@ -122,10 +117,9 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li; struct kvm_s390_interrupt_info *inti; - int rc; + struct kvm_vcpu *dst_vcpu = NULL; if (cpu_addr >= KVM_MAX_VCPUS) return SIGP_CC_NOT_OPERATIONAL; @@ -137,13 +131,10 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) inti->type = KVM_S390_INT_EXTERNAL_CALL; inti->extcall.code = vcpu->vcpu_id; - spin_lock(&fi->lock); - li = fi->local_int[cpu_addr]; - if (li == NULL) { - rc = SIGP_CC_NOT_OPERATIONAL; - kfree(inti); - goto unlock; - } + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; spin_lock_bh(&li->lock); list_add_tail(&inti->list, &li->list); atomic_set(&li->active, 1); @@ -151,11 +142,9 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) if (waitqueue_active(li->wq)) wake_up_interruptible(li->wq); spin_unlock_bh(&li->lock); - rc = SIGP_CC_ORDER_CODE_ACCEPTED; VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); -unlock: - spin_unlock(&fi->lock); - return rc; + + return SIGP_CC_ORDER_CODE_ACCEPTED; } static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) @@ -189,31 +178,26 @@ out: static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; int rc; if (cpu_addr >= KVM_MAX_VCPUS) return SIGP_CC_NOT_OPERATIONAL; - spin_lock(&fi->lock); - li = fi->local_int[cpu_addr]; - if (li == NULL) { - rc = SIGP_CC_NOT_OPERATIONAL; - goto unlock; - } + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; rc = __inject_sigp_stop(li, action); -unlock: - spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) { /* If the CPU has already been stopped, we still have * to save the status when doing stop-and-store. This * has to be done after unlocking all spinlocks. */ - struct kvm_vcpu *dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); rc = kvm_s390_store_status_unloaded(dst_vcpu, KVM_S390_STORE_STATUS_NOADDR); } @@ -333,28 +317,26 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) { + struct kvm_s390_local_interrupt *li; + struct kvm_vcpu *dst_vcpu = NULL; int rc; - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; if (cpu_addr >= KVM_MAX_VCPUS) return SIGP_CC_NOT_OPERATIONAL; - spin_lock(&fi->lock); - if (fi->local_int[cpu_addr] == NULL) - rc = SIGP_CC_NOT_OPERATIONAL; - else { - if (atomic_read(fi->local_int[cpu_addr]->cpuflags) - & CPUSTAT_RUNNING) { - /* running */ - rc = SIGP_CC_ORDER_CODE_ACCEPTED; - } else { - /* not running */ - *reg &= 0xffffffff00000000UL; - *reg |= SIGP_STATUS_NOT_RUNNING; - rc = SIGP_CC_STATUS_STORED; - } + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; + if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { + /* running */ + rc = SIGP_CC_ORDER_CODE_ACCEPTED; + } else { + /* not running */ + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_NOT_RUNNING; + rc = SIGP_CC_STATUS_STORED; } - spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr, rc); @@ -365,26 +347,22 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, /* Test whether the destination CPU is available and not busy */ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) { - struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li; int rc = SIGP_CC_ORDER_CODE_ACCEPTED; + struct kvm_vcpu *dst_vcpu = NULL; if (cpu_addr >= KVM_MAX_VCPUS) return SIGP_CC_NOT_OPERATIONAL; - spin_lock(&fi->lock); - li = fi->local_int[cpu_addr]; - if (li == NULL) { - rc = SIGP_CC_NOT_OPERATIONAL; - goto out; - } - + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + li = &dst_vcpu->arch.local_int; spin_lock_bh(&li->lock); if (li->action_bits & ACTION_STOP_ON_STOP) rc = SIGP_CC_BUSY; spin_unlock_bh(&li->lock); -out: - spin_unlock(&fi->lock); + return rc; } -- cgit v1.2.3-55-g7522 From 84ec96a6150477b9509664557bc6ad4eaa21f72a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 13 Feb 2014 13:02:32 +0100 Subject: s390/airq: add support for irq ranges Add airq_iv_alloc and airq_iv_free to allocate and free consecutive ranges of irqs from the interrupt vector. Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/airq.h | 14 ++++++++-- drivers/s390/cio/airq.c | 66 +++++++++++++++++++++++++++----------------- 2 files changed, 53 insertions(+), 27 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h index 4bbb5957ed1b..bd93ff6661b8 100644 --- a/arch/s390/include/asm/airq.h +++ b/arch/s390/include/asm/airq.h @@ -44,11 +44,21 @@ struct airq_iv { struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags); void airq_iv_release(struct airq_iv *iv); -unsigned long airq_iv_alloc_bit(struct airq_iv *iv); -void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit); +unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num); +void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num); unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start, unsigned long end); +static inline unsigned long airq_iv_alloc_bit(struct airq_iv *iv) +{ + return airq_iv_alloc(iv, 1); +} + +static inline void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit) +{ + airq_iv_free(iv, bit, 1); +} + static inline unsigned long airq_iv_end(struct airq_iv *iv) { return iv->end; diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c index f055df0b167f..445564c790f6 100644 --- a/drivers/s390/cio/airq.c +++ b/drivers/s390/cio/airq.c @@ -186,55 +186,71 @@ void airq_iv_release(struct airq_iv *iv) EXPORT_SYMBOL(airq_iv_release); /** - * airq_iv_alloc_bit - allocate an irq bit from an interrupt vector + * airq_iv_alloc - allocate irq bits from an interrupt vector * @iv: pointer to an interrupt vector structure + * @num: number of consecutive irq bits to allocate * - * Returns the bit number of the allocated irq, or -1UL if no bit - * is available or the AIRQ_IV_ALLOC flag has not been specified + * Returns the bit number of the first irq in the allocated block of irqs, + * or -1UL if no bit is available or the AIRQ_IV_ALLOC flag has not been + * specified */ -unsigned long airq_iv_alloc_bit(struct airq_iv *iv) +unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num) { - unsigned long bit; + unsigned long bit, i; - if (!iv->avail) + if (!iv->avail || num == 0) return -1UL; spin_lock(&iv->lock); bit = find_first_bit_inv(iv->avail, iv->bits); - if (bit < iv->bits) { - clear_bit_inv(bit, iv->avail); - if (bit >= iv->end) - iv->end = bit + 1; - } else + while (bit + num <= iv->bits) { + for (i = 1; i < num; i++) + if (!test_bit_inv(bit + i, iv->avail)) + break; + if (i >= num) { + /* Found a suitable block of irqs */ + for (i = 0; i < num; i++) + clear_bit_inv(bit + i, iv->avail); + if (bit + num >= iv->end) + iv->end = bit + num + 1; + break; + } + bit = find_next_bit_inv(iv->avail, iv->bits, bit + i + 1); + } + if (bit + num > iv->bits) bit = -1UL; spin_unlock(&iv->lock); return bit; } -EXPORT_SYMBOL(airq_iv_alloc_bit); +EXPORT_SYMBOL(airq_iv_alloc); /** - * airq_iv_free_bit - free an irq bit of an interrupt vector + * airq_iv_free - free irq bits of an interrupt vector * @iv: pointer to interrupt vector structure - * @bit: number of the irq bit to free + * @bit: number of the first irq bit to free + * @num: number of consecutive irq bits to free */ -void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit) +void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num) { - if (!iv->avail) + unsigned long i; + + if (!iv->avail || num == 0) return; spin_lock(&iv->lock); - /* Clear (possibly left over) interrupt bit */ - clear_bit_inv(bit, iv->vector); - /* Make the bit position available again */ - set_bit_inv(bit, iv->avail); - if (bit == iv->end - 1) { + for (i = 0; i < num; i++) { + /* Clear (possibly left over) interrupt bit */ + clear_bit_inv(bit + i, iv->vector); + /* Make the bit positions available again */ + set_bit_inv(bit + i, iv->avail); + } + if (bit + num >= iv->end) { /* Find new end of bit-field */ - while (--iv->end > 0) - if (!test_bit_inv(iv->end - 1, iv->avail)) - break; + while (iv->end > 0 && !test_bit_inv(iv->end - 1, iv->avail)) + iv->end--; } spin_unlock(&iv->lock); } -EXPORT_SYMBOL(airq_iv_free_bit); +EXPORT_SYMBOL(airq_iv_free); /** * airq_iv_scan - scan interrupt vector for non-zero bits -- cgit v1.2.3-55-g7522 From 96b14536d935848cffd904f583f67c66169002d8 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Wed, 6 Feb 2013 10:23:39 +0100 Subject: virtio-ccw: virtio-ccw adapter interrupt support. Implement the new CCW_CMD_SET_IND_ADAPTER command and try to enable adapter interrupts for every device on the first startup. If the host does not support adapter interrupts, fall back to normal I/O interrupts. virtio-ccw adapter interrupts use the same isc as normal I/O subchannels and share a summary indicator for all devices sharing the same indicator area. Indicator bits for the individual virtqueues may be contained in the same indicator area for different devices. Signed-off-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/irq.h | 1 + arch/s390/kernel/irq.c | 1 + drivers/s390/kvm/virtio_ccw.c | 278 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 270 insertions(+), 10 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 5f8bcc5fe423..35f0faab5361 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -53,6 +53,7 @@ enum interruption_class { IRQIO_PCI, IRQIO_MSI, IRQIO_VIR, + IRQIO_VAI, NMI_NMI, CPU_RST, NR_ARCH_IRQS diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index bb27a262c44a..c288ef7e47b4 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -84,6 +84,7 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" }, [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" }, [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, + [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"}, [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, }; diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index 413c6304d511..6a2b5fdcd552 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -1,7 +1,7 @@ /* * ccw based virtio transport * - * Copyright IBM Corp. 2012 + * Copyright IBM Corp. 2012, 2014 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -32,6 +32,8 @@ #include #include #include +#include +#include /* * virtio related functions @@ -58,6 +60,8 @@ struct virtio_ccw_device { unsigned long indicators; unsigned long indicators2; struct vq_config_block *config_block; + bool is_thinint; + void *airq_info; }; struct vq_info_block { @@ -72,15 +76,38 @@ struct virtio_feature_desc { __u8 index; } __packed; +struct virtio_thinint_area { + unsigned long summary_indicator; + unsigned long indicator; + u64 bit_nr; + u8 isc; +} __packed; + struct virtio_ccw_vq_info { struct virtqueue *vq; int num; void *queue; struct vq_info_block *info_block; + int bit_nr; struct list_head node; long cookie; }; +#define VIRTIO_AIRQ_ISC IO_SCH_ISC /* inherit from subchannel */ + +#define VIRTIO_IV_BITS (L1_CACHE_BYTES * 8) +#define MAX_AIRQ_AREAS 20 + +static int virtio_ccw_use_airq = 1; + +struct airq_info { + rwlock_t lock; + u8 summary_indicator; + struct airq_struct airq; + struct airq_iv *aiv; +}; +static struct airq_info *airq_areas[MAX_AIRQ_AREAS]; + #define CCW_CMD_SET_VQ 0x13 #define CCW_CMD_VDEV_RESET 0x33 #define CCW_CMD_SET_IND 0x43 @@ -91,6 +118,7 @@ struct virtio_ccw_vq_info { #define CCW_CMD_WRITE_CONF 0x21 #define CCW_CMD_WRITE_STATUS 0x31 #define CCW_CMD_READ_VQ_CONF 0x32 +#define CCW_CMD_SET_IND_ADAPTER 0x73 #define VIRTIO_CCW_DOING_SET_VQ 0x00010000 #define VIRTIO_CCW_DOING_RESET 0x00040000 @@ -102,6 +130,7 @@ struct virtio_ccw_vq_info { #define VIRTIO_CCW_DOING_SET_IND 0x01000000 #define VIRTIO_CCW_DOING_READ_VQ_CONF 0x02000000 #define VIRTIO_CCW_DOING_SET_CONF_IND 0x04000000 +#define VIRTIO_CCW_DOING_SET_IND_ADAPTER 0x08000000 #define VIRTIO_CCW_INTPARM_MASK 0xffff0000 static struct virtio_ccw_device *to_vc_device(struct virtio_device *vdev) @@ -109,6 +138,125 @@ static struct virtio_ccw_device *to_vc_device(struct virtio_device *vdev) return container_of(vdev, struct virtio_ccw_device, vdev); } +static void drop_airq_indicator(struct virtqueue *vq, struct airq_info *info) +{ + unsigned long i, flags; + + write_lock_irqsave(&info->lock, flags); + for (i = 0; i < airq_iv_end(info->aiv); i++) { + if (vq == (void *)airq_iv_get_ptr(info->aiv, i)) { + airq_iv_free_bit(info->aiv, i); + airq_iv_set_ptr(info->aiv, i, 0); + break; + } + } + write_unlock_irqrestore(&info->lock, flags); +} + +static void virtio_airq_handler(struct airq_struct *airq) +{ + struct airq_info *info = container_of(airq, struct airq_info, airq); + unsigned long ai; + + inc_irq_stat(IRQIO_VAI); + read_lock(&info->lock); + /* Walk through indicators field, summary indicator active. */ + for (ai = 0;;) { + ai = airq_iv_scan(info->aiv, ai, airq_iv_end(info->aiv)); + if (ai == -1UL) + break; + vring_interrupt(0, (void *)airq_iv_get_ptr(info->aiv, ai)); + } + info->summary_indicator = 0; + smp_wmb(); + /* Walk through indicators field, summary indicator not active. */ + for (ai = 0;;) { + ai = airq_iv_scan(info->aiv, ai, airq_iv_end(info->aiv)); + if (ai == -1UL) + break; + vring_interrupt(0, (void *)airq_iv_get_ptr(info->aiv, ai)); + } + read_unlock(&info->lock); +} + +static struct airq_info *new_airq_info(void) +{ + struct airq_info *info; + int rc; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return NULL; + rwlock_init(&info->lock); + info->aiv = airq_iv_create(VIRTIO_IV_BITS, AIRQ_IV_ALLOC | AIRQ_IV_PTR); + if (!info->aiv) { + kfree(info); + return NULL; + } + info->airq.handler = virtio_airq_handler; + info->airq.lsi_ptr = &info->summary_indicator; + info->airq.lsi_mask = 0xff; + info->airq.isc = VIRTIO_AIRQ_ISC; + rc = register_adapter_interrupt(&info->airq); + if (rc) { + airq_iv_release(info->aiv); + kfree(info); + return NULL; + } + return info; +} + +static void destroy_airq_info(struct airq_info *info) +{ + if (!info) + return; + + unregister_adapter_interrupt(&info->airq); + airq_iv_release(info->aiv); + kfree(info); +} + +static unsigned long get_airq_indicator(struct virtqueue *vqs[], int nvqs, + u64 *first, void **airq_info) +{ + int i, j; + struct airq_info *info; + unsigned long indicator_addr = 0; + unsigned long bit, flags; + + for (i = 0; i < MAX_AIRQ_AREAS && !indicator_addr; i++) { + if (!airq_areas[i]) + airq_areas[i] = new_airq_info(); + info = airq_areas[i]; + if (!info) + return 0; + write_lock_irqsave(&info->lock, flags); + bit = airq_iv_alloc(info->aiv, nvqs); + if (bit == -1UL) { + /* Not enough vacancies. */ + write_unlock_irqrestore(&info->lock, flags); + continue; + } + *first = bit; + *airq_info = info; + indicator_addr = (unsigned long)info->aiv->vector; + for (j = 0; j < nvqs; j++) { + airq_iv_set_ptr(info->aiv, bit + j, + (unsigned long)vqs[j]); + } + write_unlock_irqrestore(&info->lock, flags); + } + return indicator_addr; +} + +static void virtio_ccw_drop_indicators(struct virtio_ccw_device *vcdev) +{ + struct virtio_ccw_vq_info *info; + + list_for_each_entry(info, &vcdev->virtqueues, node) + drop_airq_indicator(info->vq, vcdev->airq_info); +} + static int doing_io(struct virtio_ccw_device *vcdev, __u32 flag) { unsigned long flags; @@ -145,6 +293,51 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev, return ret ? ret : vcdev->err; } +static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev, + struct ccw1 *ccw) +{ + int ret; + unsigned long *indicatorp = NULL; + struct virtio_thinint_area *thinint_area = NULL; + struct airq_info *airq_info = vcdev->airq_info; + + if (vcdev->is_thinint) { + thinint_area = kzalloc(sizeof(*thinint_area), + GFP_DMA | GFP_KERNEL); + if (!thinint_area) + return; + thinint_area->summary_indicator = + (unsigned long) &airq_info->summary_indicator; + thinint_area->isc = VIRTIO_AIRQ_ISC; + ccw->cmd_code = CCW_CMD_SET_IND_ADAPTER; + ccw->count = sizeof(*thinint_area); + ccw->cda = (__u32)(unsigned long) thinint_area; + } else { + indicatorp = kmalloc(sizeof(&vcdev->indicators), + GFP_DMA | GFP_KERNEL); + if (!indicatorp) + return; + *indicatorp = 0; + ccw->cmd_code = CCW_CMD_SET_IND; + ccw->count = sizeof(vcdev->indicators); + ccw->cda = (__u32)(unsigned long) indicatorp; + } + /* Deregister indicators from host. */ + vcdev->indicators = 0; + ccw->flags = 0; + ret = ccw_io_helper(vcdev, ccw, + vcdev->is_thinint ? + VIRTIO_CCW_DOING_SET_IND_ADAPTER : + VIRTIO_CCW_DOING_SET_IND); + if (ret && (ret != -ENODEV)) + dev_info(&vcdev->cdev->dev, + "Failed to deregister indicators (%d)\n", ret); + else if (vcdev->is_thinint) + virtio_ccw_drop_indicators(vcdev); + kfree(indicatorp); + kfree(thinint_area); +} + static inline long do_kvm_notify(struct subchannel_id schid, unsigned long queue_index, long cookie) @@ -232,11 +425,13 @@ static void virtio_ccw_del_vqs(struct virtio_device *vdev) { struct virtqueue *vq, *n; struct ccw1 *ccw; + struct virtio_ccw_device *vcdev = to_vc_device(vdev); ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); if (!ccw) return; + virtio_ccw_drop_indicator(vcdev, ccw); list_for_each_entry_safe(vq, n, &vdev->vqs, list) virtio_ccw_del_vq(vq, ccw); @@ -326,6 +521,54 @@ out_err: return ERR_PTR(err); } +static int virtio_ccw_register_adapter_ind(struct virtio_ccw_device *vcdev, + struct virtqueue *vqs[], int nvqs, + struct ccw1 *ccw) +{ + int ret; + struct virtio_thinint_area *thinint_area = NULL; + struct airq_info *info; + + thinint_area = kzalloc(sizeof(*thinint_area), GFP_DMA | GFP_KERNEL); + if (!thinint_area) { + ret = -ENOMEM; + goto out; + } + /* Try to get an indicator. */ + thinint_area->indicator = get_airq_indicator(vqs, nvqs, + &thinint_area->bit_nr, + &vcdev->airq_info); + if (!thinint_area->indicator) { + ret = -ENOSPC; + goto out; + } + info = vcdev->airq_info; + thinint_area->summary_indicator = + (unsigned long) &info->summary_indicator; + thinint_area->isc = VIRTIO_AIRQ_ISC; + ccw->cmd_code = CCW_CMD_SET_IND_ADAPTER; + ccw->flags = CCW_FLAG_SLI; + ccw->count = sizeof(*thinint_area); + ccw->cda = (__u32)(unsigned long)thinint_area; + ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_IND_ADAPTER); + if (ret) { + if (ret == -EOPNOTSUPP) { + /* + * The host does not support adapter interrupts + * for virtio-ccw, stop trying. + */ + virtio_ccw_use_airq = 0; + pr_info("Adapter interrupts unsupported on host\n"); + } else + dev_warn(&vcdev->cdev->dev, + "enabling adapter interrupts = %d\n", ret); + virtio_ccw_drop_indicators(vcdev); + } +out: + kfree(thinint_area); + return ret; +} + static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], @@ -355,15 +598,23 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, if (!indicatorp) goto out; *indicatorp = (unsigned long) &vcdev->indicators; - /* Register queue indicators with host. */ - vcdev->indicators = 0; - ccw->cmd_code = CCW_CMD_SET_IND; - ccw->flags = 0; - ccw->count = sizeof(vcdev->indicators); - ccw->cda = (__u32)(unsigned long) indicatorp; - ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_IND); - if (ret) - goto out; + if (vcdev->is_thinint) { + ret = virtio_ccw_register_adapter_ind(vcdev, vqs, nvqs, ccw); + if (ret) + /* no error, just fall back to legacy interrupts */ + vcdev->is_thinint = 0; + } + if (!vcdev->is_thinint) { + /* Register queue indicators with host. */ + vcdev->indicators = 0; + ccw->cmd_code = CCW_CMD_SET_IND; + ccw->flags = 0; + ccw->count = sizeof(vcdev->indicators); + ccw->cda = (__u32)(unsigned long) indicatorp; + ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_IND); + if (ret) + goto out; + } /* Register indicators2 with host for config changes */ *indicatorp = (unsigned long) &vcdev->indicators2; vcdev->indicators2 = 0; @@ -665,6 +916,7 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev, case VIRTIO_CCW_DOING_SET_CONF_IND: case VIRTIO_CCW_DOING_RESET: case VIRTIO_CCW_DOING_READ_VQ_CONF: + case VIRTIO_CCW_DOING_SET_IND_ADAPTER: vcdev->curr_io &= ~activity; wake_up(&vcdev->wait_q); break; @@ -795,6 +1047,8 @@ static int virtio_ccw_online(struct ccw_device *cdev) goto out_free; } + vcdev->is_thinint = virtio_ccw_use_airq; /* at least try */ + vcdev->vdev.dev.parent = &cdev->dev; vcdev->vdev.dev.release = virtio_ccw_release_dev; vcdev->vdev.config = &virtio_ccw_config_ops; @@ -956,6 +1210,10 @@ module_init(virtio_ccw_init); static void __exit virtio_ccw_exit(void) { + int i; + ccw_driver_unregister(&virtio_ccw_driver); + for (i = 0; i < MAX_AIRQ_AREAS; i++) + destroy_airq_info(airq_areas[i]); } module_exit(virtio_ccw_exit); -- cgit v1.2.3-55-g7522 From fed495d25e0137028c37678a14cd1d8466c02dd3 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Mon, 10 Mar 2014 15:23:34 +0100 Subject: KVM: s390: Removing untriggerable BUG_ONs The BUG_ON in kvm-s390.c is unreachable, as we get the vcpu per common code, which itself does this from the private_data field of the file descriptor, and there is no KVM_UNCREATE_VCPU. The __{set,unset}_cpu_idle BUG_ONs are not triggerable because the vcpu creation code already checks against KVM_MAX_VCPUS. Signed-off-by: Dominik Dingel Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 2 -- arch/s390/kvm/kvm-s390.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 1d0f9d532c0b..79d2e4fa9f9c 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -114,14 +114,12 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, static void __set_cpu_idle(struct kvm_vcpu *vcpu) { - BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); } static void __unset_cpu_idle(struct kvm_vcpu *vcpu) { - BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9136f8d40850..62683576f8e2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -949,8 +949,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); - BUG_ON(kvm_get_vcpu(vcpu->kvm, vcpu->vcpu_id) == NULL); - switch (kvm_run->exit_reason) { case KVM_EXIT_S390_SIEIC: case KVM_EXIT_UNKNOWN: -- cgit v1.2.3-55-g7522 From 2955c83f72801245afd0fe5c560cc75b82bea9aa Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 6 Mar 2014 16:01:38 +0100 Subject: KVM: s390: Optimize ucontrol path Since commit 7c470539c95630c1f2a10f109e96f249730b75eb (s390/kvm: avoid automatic sie reentry) we will run through the C code of KVM on host interrupts instead of just reentering the guest. This will result in additional ucontrol exits (at least HZ per second). Let handle a 0 intercept in the kernel and dont return to userspace, even if in ucontrol mode. Signed-off-by: Christian Borntraeger Reviewed-by: Cornelia Huck CC: stable@vger.kernel.org --- arch/s390/kvm/kvm-s390.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 62683576f8e2..7337c577e949 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -896,7 +896,8 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) if (rc == 0) { if (kvm_is_ucontrol(vcpu->kvm)) - rc = -EOPNOTSUPP; + /* Don't exit for host interrupts. */ + rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; else rc = kvm_handle_sie_intercept(vcpu); } -- cgit v1.2.3-55-g7522 From d938dc55225a7212e7f31c5a8571da304cc3de16 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Wed, 23 Oct 2013 18:26:34 +0200 Subject: KVM: Add per-vm capability enablement. Allow KVM_ENABLE_CAP to act on a vm as well as on a vcpu. This makes more sense when the caller wants to enable a vm-related capability. s390 will be the first user; wire it up. Reviewed-by: Thomas Huth Reviewed-by: Christian Borntraeger Signed-off-by: Cornelia Huck --- Documentation/virtual/kvm/api.txt | 6 ++++-- arch/s390/kvm/kvm-s390.c | 24 ++++++++++++++++++++++++ include/uapi/linux/kvm.h | 5 +++++ 3 files changed, 33 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4714f282a43e..faf6fe9772c7 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -932,9 +932,9 @@ documentation when it pops into existence). 4.37 KVM_ENABLE_CAP -Capability: KVM_CAP_ENABLE_CAP +Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM Architectures: ppc, s390 -Type: vcpu ioctl +Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM) Parameters: struct kvm_enable_cap (in) Returns: 0 on success; -1 on error @@ -965,6 +965,8 @@ function properly, this is the place to put them. __u8 pad[64]; }; +The vcpu ioctl should be used for vcpu-specific capabilities, the vm ioctl +for vm-wide capabilities. 4.38 KVM_GET_MP_STATE diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7337c577e949..9f1e99f12d4f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -159,6 +159,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_S390_CSS_SUPPORT: case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: + case KVM_CAP_ENABLE_CAP_VM: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -187,6 +188,21 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, return 0; } +static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + default: + r = -EINVAL; + break; + } + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -204,6 +220,14 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_s390_inject_vm(kvm, &s390int); break; } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + break; + r = kvm_vm_ioctl_enable_cap(kvm, &cap); + break; + } default: r = -ENOTTY; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a7518be31d53..46ea1b470c76 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -741,6 +741,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_EXT_EMUL_CPUID 95 #define KVM_CAP_HYPERV_TIME 96 #define KVM_CAP_IOAPIC_POLARITY_IGNORED 97 +#define KVM_CAP_ENABLE_CAP_VM 98 #ifdef KVM_CAP_IRQ_ROUTING @@ -1076,6 +1077,10 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_DEBUGREGS */ #define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) #define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) +/* + * vcpu version available with KVM_ENABLE_CAP + * vm version available with KVM_CAP_ENABLE_CAP_VM + */ #define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) /* Available with KVM_CAP_XSAVE */ #define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct kvm_xsave) -- cgit v1.2.3-55-g7522 From 841b91c584b6d1e2a2cb508bd2d0236cd37e1750 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 15 Jul 2013 13:36:01 +0200 Subject: KVM: s390: adapter interrupt sources Add a new interface to register/deregister sources of adapter interrupts identified by an unique id via the flic. Adapters may also be maskable and carry a list of pinned pages. These adapters will be used by irq routing later. Acked-by: Christian Borntraeger Signed-off-by: Cornelia Huck --- Documentation/virtual/kvm/devices/s390_flic.txt | 45 ++++++ arch/s390/include/asm/kvm_host.h | 23 ++++ arch/s390/include/uapi/asm/kvm.h | 22 +++ arch/s390/kvm/interrupt.c | 173 +++++++++++++++++++++++- arch/s390/kvm/kvm-s390.c | 1 + arch/s390/kvm/kvm-s390.h | 2 + 6 files changed, 265 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt index 410fa673e5b6..4ceef53164b0 100644 --- a/Documentation/virtual/kvm/devices/s390_flic.txt +++ b/Documentation/virtual/kvm/devices/s390_flic.txt @@ -12,6 +12,7 @@ FLIC provides support to - inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS) - purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS) - enable/disable for the guest transparent async page faults +- register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*) Groups: KVM_DEV_FLIC_ENQUEUE @@ -44,3 +45,47 @@ Groups: Disables async page faults for the guest and waits until already pending async page faults are done. This is necessary to trigger a completion interrupt for every init interrupt before migrating the interrupt list. + + KVM_DEV_FLIC_ADAPTER_REGISTER + Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter + describing the adapter to register: + +struct kvm_s390_io_adapter { + __u32 id; + __u8 isc; + __u8 maskable; + __u8 swap; + __u8 pad; +}; + + id contains the unique id for the adapter, isc the I/O interruption subclass + to use, maskable whether this adapter may be masked (interrupts turned off) + and swap whether the indicators need to be byte swapped. + + + KVM_DEV_FLIC_ADAPTER_MODIFY + Modifies attributes of an existing I/O adapter interrupt source. Takes + a kvm_s390_io_adapter_req specifiying the adapter and the operation: + +struct kvm_s390_io_adapter_req { + __u32 id; + __u8 type; + __u8 mask; + __u16 pad0; + __u64 addr; +}; + + id specifies the adapter and type the operation. The supported operations + are: + + KVM_S390_IO_ADAPTER_MASK + mask or unmask the adapter, as specified in mask + + KVM_S390_IO_ADAPTER_MAP + perform a gmap translation for the guest address provided in addr, + pin a userspace page for the translated address and add it to the + list of mappings + + KVM_S390_IO_ADAPTER_UNMAP + release a userspace page for the translated address specified in addr + from the list of mappings diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 734d302ba389..0d5235262707 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -19,6 +19,7 @@ #include #include #include +#include #define KVM_MAX_VCPUS 64 #define KVM_USER_MEM_SLOTS 32 @@ -245,6 +246,27 @@ struct kvm_vm_stat { struct kvm_arch_memory_slot { }; +struct s390_map_info { + struct list_head list; + __u64 guest_addr; + __u64 addr; + struct page *page; +}; + +struct s390_io_adapter { + unsigned int id; + int isc; + bool maskable; + bool masked; + bool swap; + struct rw_semaphore maps_lock; + struct list_head maps; + atomic_t nr_maps; +}; + +#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8) +#define MAX_S390_ADAPTER_MAPS 256 + struct kvm_arch{ struct sca_block *sca; debug_info_t *dbf; @@ -252,6 +274,7 @@ struct kvm_arch{ struct kvm_device *flic; struct gmap *gmap; int css_support; + struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; }; #define KVM_HVA_ERR_BAD (-1UL) diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 2f0ade24f96a..c003c6a73b1e 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -22,6 +22,8 @@ #define KVM_DEV_FLIC_CLEAR_IRQS 3 #define KVM_DEV_FLIC_APF_ENABLE 4 #define KVM_DEV_FLIC_APF_DISABLE_WAIT 5 +#define KVM_DEV_FLIC_ADAPTER_REGISTER 6 +#define KVM_DEV_FLIC_ADAPTER_MODIFY 7 /* * We can have up to 4*64k pending subchannels + 8 adapter interrupts, * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. @@ -32,6 +34,26 @@ #define KVM_S390_MAX_FLOAT_IRQS 266250 #define KVM_S390_FLIC_MAX_BUFFER 0x2000000 +struct kvm_s390_io_adapter { + __u32 id; + __u8 isc; + __u8 maskable; + __u8 swap; + __u8 pad; +}; + +#define KVM_S390_IO_ADAPTER_MASK 1 +#define KVM_S390_IO_ADAPTER_MAP 2 +#define KVM_S390_IO_ADAPTER_UNMAP 3 + +struct kvm_s390_io_adapter_req { + __u32 id; + __u8 type; + __u8 mask; + __u16 pad0; + __u64 addr; +}; + /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { /* general purpose regs for s390 */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 79d2e4fa9f9c..7ecef5a18e25 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1,7 +1,7 @@ /* * handling kvm guest interrupts * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008,2014 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -1054,6 +1054,171 @@ static int enqueue_floating_irq(struct kvm_device *dev, return r; } +static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id) +{ + if (id >= MAX_S390_IO_ADAPTERS) + return NULL; + return kvm->arch.adapters[id]; +} + +static int register_io_adapter(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct s390_io_adapter *adapter; + struct kvm_s390_io_adapter adapter_info; + + if (copy_from_user(&adapter_info, + (void __user *)attr->addr, sizeof(adapter_info))) + return -EFAULT; + + if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) || + (dev->kvm->arch.adapters[adapter_info.id] != NULL)) + return -EINVAL; + + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) + return -ENOMEM; + + INIT_LIST_HEAD(&adapter->maps); + init_rwsem(&adapter->maps_lock); + atomic_set(&adapter->nr_maps, 0); + adapter->id = adapter_info.id; + adapter->isc = adapter_info.isc; + adapter->maskable = adapter_info.maskable; + adapter->masked = false; + adapter->swap = adapter_info.swap; + dev->kvm->arch.adapters[adapter->id] = adapter; + + return 0; +} + +int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked) +{ + int ret; + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + + if (!adapter || !adapter->maskable) + return -EINVAL; + ret = adapter->masked; + adapter->masked = masked; + return ret; +} + +static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr) +{ + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + struct s390_map_info *map; + int ret; + + if (!adapter || !addr) + return -EINVAL; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) { + ret = -ENOMEM; + goto out; + } + INIT_LIST_HEAD(&map->list); + map->guest_addr = addr; + map->addr = gmap_translate(addr, kvm->arch.gmap); + if (map->addr == -EFAULT) { + ret = -EFAULT; + goto out; + } + ret = get_user_pages_fast(map->addr, 1, 1, &map->page); + if (ret < 0) + goto out; + BUG_ON(ret != 1); + down_write(&adapter->maps_lock); + if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) { + list_add_tail(&map->list, &adapter->maps); + ret = 0; + } else { + put_page(map->page); + ret = -EINVAL; + } + up_write(&adapter->maps_lock); +out: + if (ret) + kfree(map); + return ret; +} + +static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr) +{ + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + struct s390_map_info *map, *tmp; + int found = 0; + + if (!adapter || !addr) + return -EINVAL; + + down_write(&adapter->maps_lock); + list_for_each_entry_safe(map, tmp, &adapter->maps, list) { + if (map->guest_addr == addr) { + found = 1; + atomic_dec(&adapter->nr_maps); + list_del(&map->list); + put_page(map->page); + kfree(map); + break; + } + } + up_write(&adapter->maps_lock); + + return found ? 0 : -EINVAL; +} + +void kvm_s390_destroy_adapters(struct kvm *kvm) +{ + int i; + struct s390_map_info *map, *tmp; + + for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) { + if (!kvm->arch.adapters[i]) + continue; + list_for_each_entry_safe(map, tmp, + &kvm->arch.adapters[i]->maps, list) { + list_del(&map->list); + put_page(map->page); + kfree(map); + } + kfree(kvm->arch.adapters[i]); + } +} + +static int modify_io_adapter(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct kvm_s390_io_adapter_req req; + struct s390_io_adapter *adapter; + int ret; + + if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) + return -EFAULT; + + adapter = get_io_adapter(dev->kvm, req.id); + if (!adapter) + return -EINVAL; + switch (req.type) { + case KVM_S390_IO_ADAPTER_MASK: + ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask); + if (ret > 0) + ret = 0; + break; + case KVM_S390_IO_ADAPTER_MAP: + ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr); + break; + case KVM_S390_IO_ADAPTER_UNMAP: + ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr); + break; + default: + ret = -EINVAL; + } + + return ret; +} + static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r = 0; @@ -1082,6 +1247,12 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) kvm_for_each_vcpu(i, vcpu, dev->kvm) kvm_clear_async_pf_completion_queue(vcpu); break; + case KVM_DEV_FLIC_ADAPTER_REGISTER: + r = register_io_adapter(dev, attr); + break; + case KVM_DEV_FLIC_ADAPTER_MODIFY: + r = modify_io_adapter(dev, attr); + break; default: r = -EINVAL; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9f1e99f12d4f..2e6fbb0b4f68 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -343,6 +343,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) debug_unregister(kvm->arch.dbf); if (!kvm_is_ucontrol(kvm)) gmap_free(kvm->arch.gmap); + kvm_s390_destroy_adapters(kvm); } /* Section: vcpu related */ diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index ed4750a5bc3c..5502cc951868 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -136,6 +136,7 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid); +int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); /* implemented in priv.c */ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); @@ -162,5 +163,6 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); /* implemented in interrupt.c */ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int psw_extint_disabled(struct kvm_vcpu *vcpu); +void kvm_s390_destroy_adapters(struct kvm *kvm); #endif -- cgit v1.2.3-55-g7522 From 84223598778ba08041f4297fda485df83414d57e Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 15 Jul 2013 13:36:01 +0200 Subject: KVM: s390: irq routing for adapter interrupts. Introduce a new interrupt class for s390 adapter interrupts and enable irqfds for s390. This is depending on a new s390 specific vm capability, KVM_CAP_S390_IRQCHIP, that needs to be enabled by userspace. Acked-by: Christian Borntraeger Signed-off-by: Cornelia Huck --- Documentation/virtual/kvm/api.txt | 21 +++++-- arch/s390/include/asm/kvm_host.h | 9 +++ arch/s390/kvm/Kconfig | 2 + arch/s390/kvm/Makefile | 2 +- arch/s390/kvm/interrupt.c | 121 ++++++++++++++++++++++++++++++++++++++ arch/s390/kvm/irq.h | 22 +++++++ arch/s390/kvm/kvm-s390.c | 17 ++++++ include/linux/kvm_host.h | 9 +++ include/uapi/linux/kvm.h | 11 ++++ 9 files changed, 209 insertions(+), 5 deletions(-) create mode 100644 arch/s390/kvm/irq.h (limited to 'arch/s390') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index faf6fe9772c7..2cb1640a90ad 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -586,8 +586,8 @@ struct kvm_fpu { 4.24 KVM_CREATE_IRQCHIP -Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64, ARM, arm64 +Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390) +Architectures: x86, ia64, ARM, arm64, s390 Type: vm ioctl Parameters: none Returns: 0 on success, -1 on error @@ -596,7 +596,10 @@ Creates an interrupt controller model in the kernel. On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is -created. +created. On s390, a dummy irq routing table is created. + +Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled +before KVM_CREATE_IRQCHIP can be used. 4.25 KVM_IRQ_LINE @@ -1336,7 +1339,7 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. 4.52 KVM_SET_GSI_ROUTING Capability: KVM_CAP_IRQ_ROUTING -Architectures: x86 ia64 +Architectures: x86 ia64 s390 Type: vm ioctl Parameters: struct kvm_irq_routing (in) Returns: 0 on success, -1 on error @@ -1359,6 +1362,7 @@ struct kvm_irq_routing_entry { union { struct kvm_irq_routing_irqchip irqchip; struct kvm_irq_routing_msi msi; + struct kvm_irq_routing_s390_adapter adapter; __u32 pad[8]; } u; }; @@ -1366,6 +1370,7 @@ struct kvm_irq_routing_entry { /* gsi routing entry types */ #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 +#define KVM_IRQ_ROUTING_S390_ADAPTER 3 No flags are specified so far, the corresponding field must be set to zero. @@ -1381,6 +1386,14 @@ struct kvm_irq_routing_msi { __u32 pad; }; +struct kvm_irq_routing_s390_adapter { + __u64 ind_addr; + __u64 summary_addr; + __u64 ind_offset; + __u32 summary_offset; + __u32 adapter_id; +}; + 4.53 KVM_ASSIGN_SET_MSIX_NR diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 0d5235262707..dd3933754d23 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -24,6 +24,14 @@ #define KVM_MAX_VCPUS 64 #define KVM_USER_MEM_SLOTS 32 +/* + * These seem to be used for allocating ->chip in the routing table, + * which we don't use. 4096 is an out-of-thin-air value. If we need + * to look at ->chip later on, we'll need to revisit this. + */ +#define KVM_NR_IRQCHIPS 1 +#define KVM_IRQCHIP_NUM_PINS 4096 + struct sca_entry { atomic_t scn; __u32 reserved; @@ -274,6 +282,7 @@ struct kvm_arch{ struct kvm_device *flic; struct gmap *gmap; int css_support; + int use_irqchip; struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; }; diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index c8bacbcd2e5b..10d529ac9821 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -25,6 +25,8 @@ config KVM select HAVE_KVM_EVENTFD select KVM_ASYNC_PF select KVM_ASYNC_PF_SYNC + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index a47d2c355f68..d3adb37e93a4 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -7,7 +7,7 @@ # as published by the Free Software Foundation. KVM := ../../../virt/kvm -common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o +common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 7ecef5a18e25..2e2814eceb85 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -1284,3 +1285,123 @@ struct kvm_device_ops kvm_flic_ops = { .create = flic_create, .destroy = flic_destroy, }; + +static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap) +{ + unsigned long bit; + + bit = bit_nr + (addr % PAGE_SIZE) * 8; + + return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit; +} + +static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter, + u64 addr) +{ + struct s390_map_info *map; + + if (!adapter) + return NULL; + + list_for_each_entry(map, &adapter->maps, list) { + if (map->guest_addr == addr) + return map; + } + return NULL; +} + +static int adapter_indicators_set(struct kvm *kvm, + struct s390_io_adapter *adapter, + struct kvm_s390_adapter_int *adapter_int) +{ + unsigned long bit; + int summary_set, idx; + struct s390_map_info *info; + void *map; + + info = get_map_info(adapter, adapter_int->ind_addr); + if (!info) + return -1; + map = page_address(info->page); + bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap); + set_bit(bit, map); + idx = srcu_read_lock(&kvm->srcu); + mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT); + set_page_dirty_lock(info->page); + info = get_map_info(adapter, adapter_int->summary_addr); + if (!info) { + srcu_read_unlock(&kvm->srcu, idx); + return -1; + } + map = page_address(info->page); + bit = get_ind_bit(info->addr, adapter_int->summary_offset, + adapter->swap); + summary_set = test_and_set_bit(bit, map); + mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT); + set_page_dirty_lock(info->page); + srcu_read_unlock(&kvm->srcu, idx); + return summary_set ? 0 : 1; +} + +/* + * < 0 - not injected due to error + * = 0 - coalesced, summary indicator already active + * > 0 - injected interrupt + */ +static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + int ret; + struct s390_io_adapter *adapter; + + /* We're only interested in the 0->1 transition. */ + if (!level) + return 0; + adapter = get_io_adapter(kvm, e->adapter.adapter_id); + if (!adapter) + return -1; + down_read(&adapter->maps_lock); + ret = adapter_indicators_set(kvm, adapter, &e->adapter); + up_read(&adapter->maps_lock); + if ((ret > 0) && !adapter->masked) { + struct kvm_s390_interrupt s390int = { + .type = KVM_S390_INT_IO(1, 0, 0, 0), + .parm = 0, + .parm64 = (adapter->isc << 27) | 0x80000000, + }; + ret = kvm_s390_inject_vm(kvm, &s390int); + if (ret == 0) + ret = 1; + } + return ret; +} + +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int ret; + + switch (ue->type) { + case KVM_IRQ_ROUTING_S390_ADAPTER: + e->set = set_adapter_int; + e->adapter.summary_addr = ue->u.adapter.summary_addr; + e->adapter.ind_addr = ue->u.adapter.ind_addr; + e->adapter.summary_offset = ue->u.adapter.summary_offset; + e->adapter.ind_offset = ue->u.adapter.ind_offset; + e->adapter.adapter_id = ue->u.adapter.adapter_id; + ret = 0; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, + int irq_source_id, int level, bool line_status) +{ + return -EINVAL; +} diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h new file mode 100644 index 000000000000..d98e4159643d --- /dev/null +++ b/arch/s390/kvm/irq.h @@ -0,0 +1,22 @@ +/* + * s390 irqchip routines + * + * Copyright IBM Corp. 2014 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Cornelia Huck + */ +#ifndef __KVM_IRQ_H +#define __KVM_IRQ_H + +#include + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + return 1; +} + +#endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2e6fbb0b4f68..ce5b659ec531 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -196,6 +196,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) return -EINVAL; switch (cap->cap) { + case KVM_CAP_S390_IRQCHIP: + kvm->arch.use_irqchip = 1; + r = 0; + break; default: r = -EINVAL; break; @@ -228,6 +232,18 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_enable_cap(kvm, &cap); break; } + case KVM_CREATE_IRQCHIP: { + struct kvm_irq_routing_entry routing; + + r = -EINVAL; + if (kvm->arch.use_irqchip) { + /* Set up dummy routing. */ + memset(&routing, 0, sizeof(routing)); + kvm_set_irq_routing(kvm, &routing, 0, 0); + r = 0; + } + break; + } default: r = -ENOTTY; } @@ -284,6 +300,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) } kvm->arch.css_support = 0; + kvm->arch.use_irqchip = 0; return 0; out_nogmap: diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9816b68b085f..da7510b4c6ad 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -297,6 +297,14 @@ static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memsl return ALIGN(memslot->npages, BITS_PER_LONG) / 8; } +struct kvm_s390_adapter_int { + u64 ind_addr; + u64 summary_addr; + u64 ind_offset; + u32 summary_offset; + u32 adapter_id; +}; + struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; @@ -309,6 +317,7 @@ struct kvm_kernel_irq_routing_entry { unsigned pin; } irqchip; struct msi_msg msi; + struct kvm_s390_adapter_int adapter; }; struct hlist_node link; }; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 46ea1b470c76..a8f4ee5d2e82 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -742,6 +742,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_HYPERV_TIME 96 #define KVM_CAP_IOAPIC_POLARITY_IGNORED 97 #define KVM_CAP_ENABLE_CAP_VM 98 +#define KVM_CAP_S390_IRQCHIP 99 #ifdef KVM_CAP_IRQ_ROUTING @@ -757,9 +758,18 @@ struct kvm_irq_routing_msi { __u32 pad; }; +struct kvm_irq_routing_s390_adapter { + __u64 ind_addr; + __u64 summary_addr; + __u64 ind_offset; + __u32 summary_offset; + __u32 adapter_id; +}; + /* gsi routing entry types */ #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 +#define KVM_IRQ_ROUTING_S390_ADAPTER 3 struct kvm_irq_routing_entry { __u32 gsi; @@ -769,6 +779,7 @@ struct kvm_irq_routing_entry { union { struct kvm_irq_routing_irqchip irqchip; struct kvm_irq_routing_msi msi; + struct kvm_irq_routing_s390_adapter adapter; __u32 pad[8]; } u; }; -- cgit v1.2.3-55-g7522 From f6c137ff00a478ae619deea8650829dd2f8e71b9 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 19 Mar 2014 11:18:29 +0100 Subject: KVM: s390: randomize sca address We allocate a page for the 2k sca, so lets use the space to improve hit rate of some internal cpu caches. No need to change the freeing of the page, as this will shift away the page offset bits anyway. Signed-off-by: Christian Borntraeger Reviewed-by: David Hildenbrand --- arch/s390/kvm/kvm-s390.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7337c577e949..a02979f1e1af 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -215,6 +215,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int rc; char debug_name[16]; + static unsigned long sca_offset; rc = -EINVAL; #ifdef CONFIG_KVM_S390_UCONTROL @@ -236,6 +237,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); if (!kvm->arch.sca) goto out_err; + spin_lock(&kvm_lock); + sca_offset = (sca_offset + 16) & 0x7f0; + kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); + spin_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); -- cgit v1.2.3-55-g7522 From 609433fbed4f25e11e8b058ab7e9478b212879a9 Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Tue, 18 Mar 2014 16:34:18 +0100 Subject: KVM: s390: fix calculation of idle_mask array size We need BITS_TO_LONGS, not sizeof(long) to calculate the correct size. idle_mask is a bitmask, each bit representing the state of a cpu. The desired outcome is an array of unsigned long fields that can fit KVM_MAX_VCPUS bits. We should not use sizeof(long) which returnes the size in bytes, but BITS_TO_LONGS Signed-off-by: Jens Freimann Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 734d302ba389..c36cd35e03f3 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -213,8 +213,7 @@ struct kvm_s390_float_interrupt { struct list_head list; atomic_t active; int next_rr_cpu; - unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1) - / sizeof(long)]; + unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; unsigned int irq_count; }; -- cgit v1.2.3-55-g7522 From 91880d07fc9b2b6b7a726765039897af99d9ca78 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 20 Mar 2014 13:20:46 +0100 Subject: KVM: s390: Fix possible memory leak in SIGP functions When kvm_get_vcpu() returned NULL for the destination CPU in __sigp_emergency() or __sigp_external_call(), the memory for the "inti" structure was not released anymore. This patch fixes this issue by moving the check for !dst_vcpu before the kzalloc() call. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/sigp.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 3fe44c441609..26caeb530a78 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -58,7 +58,9 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) struct kvm_s390_interrupt_info *inti; struct kvm_vcpu *dst_vcpu = NULL; - if (cpu_addr >= KVM_MAX_VCPUS) + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; inti = kzalloc(sizeof(*inti), GFP_KERNEL); @@ -68,9 +70,6 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) inti->type = KVM_S390_INT_EMERGENCY; inti->emerg.code = vcpu->vcpu_id; - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; li = &dst_vcpu->arch.local_int; spin_lock_bh(&li->lock); list_add_tail(&inti->list, &li->list); @@ -121,7 +120,9 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) struct kvm_s390_interrupt_info *inti; struct kvm_vcpu *dst_vcpu = NULL; - if (cpu_addr >= KVM_MAX_VCPUS) + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; inti = kzalloc(sizeof(*inti), GFP_KERNEL); @@ -131,9 +132,6 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) inti->type = KVM_S390_INT_EXTERNAL_CALL; inti->extcall.code = vcpu->vcpu_id; - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); - if (!dst_vcpu) - return SIGP_CC_NOT_OPERATIONAL; li = &dst_vcpu->arch.local_int; spin_lock_bh(&li->lock); list_add_tail(&inti->list, &li->list); -- cgit v1.2.3-55-g7522 From 2ed10cc15e7edf2daf22ce807a877a1266e97711 Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Tue, 11 Feb 2014 13:48:07 +0100 Subject: KVM: s390: clear local interrupts at cpu initial reset Empty list of local interrupts when vcpu goes through initial reset to provide a clean state Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 14 ++++++++++++++ arch/s390/kvm/kvm-s390.c | 1 + arch/s390/kvm/kvm-s390.h | 1 + 3 files changed, 16 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 79d2e4fa9f9c..05bffd74961f 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -509,6 +509,20 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) return HRTIMER_NORESTART; } +void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_interrupt_info *n, *inti = NULL; + + spin_lock_bh(&li->lock); + list_for_each_entry_safe(inti, n, &li->list, list) { + list_del(&inti->list); + kfree(inti); + } + atomic_set(&li->active, 0); + spin_unlock_bh(&li->lock); +} + void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index a02979f1e1af..83b79447de55 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -395,6 +395,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; kvm_clear_async_pf_completion_queue(vcpu); atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); + kvm_s390_clear_local_irqs(vcpu); } int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index ed4750a5bc3c..6311170843b6 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -129,6 +129,7 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); +void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu); int __must_check kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int); int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, -- cgit v1.2.3-55-g7522