19 files changed, 19971 insertions, 0 deletions
diff --git a/target/s390x/tcg/cc_helper.c b/target/s390x/tcg/cc_helper.c
new file mode 100644
index 0000000000..c2c96c3a3c
--- /dev/null
+++ b/target/s390x/tcg/cc_helper.c
@@ -0,0 +1,538 @@
+/*
+ *  S/390 condition code helper routines
+ *
+ *  Copyright (c) 2009 Ulrich Hecht
+ *  Copyright (c) 2009 Alexander Graf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "tcg_s390x.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "qemu/host-utils.h"
+
+/* #define DEBUG_HELPER */
+#ifdef DEBUG_HELPER
+#define HELPER_LOG(x...) qemu_log(x)
+#else
+#define HELPER_LOG(x...)
+#endif
+
+static uint32_t cc_calc_ltgt_32(int32_t src, int32_t dst)
+{
+    if (src == dst) {
+        return 0;
+    } else if (src < dst) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+
+static uint32_t cc_calc_ltgt0_32(int32_t dst)
+{
+    return cc_calc_ltgt_32(dst, 0);
+}
+
+static uint32_t cc_calc_ltgt_64(int64_t src, int64_t dst)
+{
+    if (src == dst) {
+        return 0;
+    } else if (src < dst) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+
+static uint32_t cc_calc_ltgt0_64(int64_t dst)
+{
+    return cc_calc_ltgt_64(dst, 0);
+}
+
+static uint32_t cc_calc_ltugtu_32(uint32_t src, uint32_t dst)
+{
+    if (src == dst) {
+        return 0;
+    } else if (src < dst) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+
+static uint32_t cc_calc_ltugtu_64(uint64_t src, uint64_t dst)
+{
+    if (src == dst) {
+        return 0;
+    } else if (src < dst) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+
+static uint32_t cc_calc_tm_32(uint32_t val, uint32_t mask)
+{
+    uint32_t r = val & mask;
+
+    if (r == 0) {
+        return 0;
+    } else if (r == mask) {
+        return 3;
+    } else {
+        return 1;
+    }
+}
+
+static uint32_t cc_calc_tm_64(uint64_t val, uint64_t mask)
+{
+    uint64_t r = val & mask;
+
+    if (r == 0) {
+        return 0;
+    } else if (r == mask) {
+        return 3;
+    } else {
+        int top = clz64(mask);
+        if ((int64_t)(val << top) < 0) {
+            return 2;
+        } else {
+            return 1;
+        }
+    }
+}
+
+static uint32_t cc_calc_nz(uint64_t dst)
+{
+    return !!dst;
+}
+
+static uint32_t cc_calc_addu(uint64_t carry_out, uint64_t result)
+{
+    g_assert(carry_out <= 1);
+    return (result != 0) + 2 * carry_out;
+}
+
+static uint32_t cc_calc_subu(uint64_t borrow_out, uint64_t result)
+{
+    return cc_calc_addu(borrow_out + 1, result);
+}
+
+static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
+{
+    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
+        return 3; /* overflow */
+    } else {
+        if (ar < 0) {
+            return 1;
+        } else if (ar > 0) {
+            return 2;
+        } else {
+            return 0;
+        }
+    }
+}
+
+static uint32_t cc_calc_sub_64(int64_t a1, int64_t a2, int64_t ar)
+{
+    if ((a1 > 0 && a2 < 0 && ar < 0) || (a1 < 0 && a2 > 0 && ar > 0)) {
+        return 3; /* overflow */
+    } else {
+        if (ar < 0) {
+            return 1;
+        } else if (ar > 0) {
+            return 2;
+        } else {
+            return 0;
+        }
+    }
+}
+
+static uint32_t cc_calc_abs_64(int64_t dst)
+{
+    if ((uint64_t)dst == 0x8000000000000000ULL) {
+        return 3;
+    } else if (dst) {
+        return 2;
+    } else {
+        return 0;
+    }
+}
+
+static uint32_t cc_calc_nabs_64(int64_t dst)
+{
+    return !!dst;
+}
+
+static uint32_t cc_calc_comp_64(int64_t dst)
+{
+    if ((uint64_t)dst == 0x8000000000000000ULL) {
+        return 3;
+    } else if (dst < 0) {
+        return 1;
+    } else if (dst > 0) {
+        return 2;
+    } else {
+        return 0;
+    }
+}
+
+
+static uint32_t cc_calc_add_32(int32_t a1, int32_t a2, int32_t ar)
+{
+    if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
+        return 3; /* overflow */
+    } else {
+        if (ar < 0) {
+            return 1;
+        } else if (ar > 0) {
+            return 2;
+        } else {
+            return 0;
+        }
+    }
+}
+
+static uint32_t cc_calc_sub_32(int32_t a1, int32_t a2, int32_t ar)
+{
+    if ((a1 > 0 && a2 < 0 && ar < 0) || (a1 < 0 && a2 > 0 && ar > 0)) {
+        return 3; /* overflow */
+    } else {
+        if (ar < 0) {
+            return 1;
+        } else if (ar > 0) {
+            return 2;
+        } else {
+            return 0;
+        }
+    }
+}
+
+static uint32_t cc_calc_abs_32(int32_t dst)
+{
+    if ((uint32_t)dst == 0x80000000UL) {
+        return 3;
+    } else if (dst) {
+        return 2;
+    } else {
+        return 0;
+    }
+}
+
+static uint32_t cc_calc_nabs_32(int32_t dst)
+{
+    return !!dst;
+}
+
+static uint32_t cc_calc_comp_32(int32_t dst)
+{
+    if ((uint32_t)dst == 0x80000000UL) {
+        return 3;
+    } else if (dst < 0) {
+        return 1;
+    } else if (dst > 0) {
+        return 2;
+    } else {
+        return 0;
+    }
+}
+
+/* calculate condition code for insert character under mask insn */
+static uint32_t cc_calc_icm(uint64_t mask, uint64_t val)
+{
+    if ((val & mask) == 0) {
+        return 0;
+    } else {
+        int top = clz64(mask);
+        if ((int64_t)(val << top) < 0) {
+            return 1;
+        } else {
+            return 2;
+        }
+    }
+}
+
+static uint32_t cc_calc_sla_32(uint32_t src, int shift)
+{
+    uint32_t mask = ((1U << shift) - 1U) << (32 - shift);
+    uint32_t sign = 1U << 31;
+    uint32_t match;
+    int32_t r;
+
+    /* Check if the sign bit stays the same.  */
+    if (src & sign) {
+        match = mask;
+    } else {
+        match = 0;
+    }
+    if ((src & mask) != match) {
+        /* Overflow.  */
+        return 3;
+    }
+
+    r = ((src << shift) & ~sign) | (src & sign);
+    if (r == 0) {
+        return 0;
+    } else if (r < 0) {
+        return 1;
+    }
+    return 2;
+}
+
+static uint32_t cc_calc_sla_64(uint64_t src, int shift)
+{
+    uint64_t mask = ((1ULL << shift) - 1ULL) << (64 - shift);
+    uint64_t sign = 1ULL << 63;
+    uint64_t match;
+    int64_t r;
+
+    /* Check if the sign bit stays the same.  */
+    if (src & sign) {
+        match = mask;
+    } else {
+        match = 0;
+    }
+    if ((src & mask) != match) {
+        /* Overflow.  */
+        return 3;
+    }
+
+    r = ((src << shift) & ~sign) | (src & sign);
+    if (r == 0) {
+        return 0;
+    } else if (r < 0) {
+        return 1;
+    }
+    return 2;
+}
+
+static uint32_t cc_calc_flogr(uint64_t dst)
+{
+    return dst ? 2 : 0;
+}
+
+static uint32_t cc_calc_lcbb(uint64_t dst)
+{
+    return dst == 16 ? 0 : 3;
+}
+
+static uint32_t cc_calc_vc(uint64_t low, uint64_t high)
+{
+    if (high == -1ull && low == -1ull) {
+        /* all elements match */
+        return 0;
+    } else if (high == 0 && low == 0) {
+        /* no elements match */
+        return 3;
+    } else {
+        /* some elements but not all match */
+        return 1;
+    }
+}
+
+static uint32_t cc_calc_muls_32(int64_t res)
+{
+    const int64_t tmp = res >> 31;
+
+    if (!res) {
+        return 0;
+    } else if (tmp && tmp != -1) {
+        return 3;
+    } else if (res < 0) {
+        return 1;
+    }
+    return 2;
+}
+
+static uint64_t cc_calc_muls_64(int64_t res_high, uint64_t res_low)
+{
+    if (!res_high && !res_low) {
+        return 0;
+    } else if (res_high + (res_low >> 63) != 0) {
+        return 3;
+    } else if (res_high < 0) {
+        return 1;
+    }
+    return 2;
+}
+
+static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
+                                  uint64_t src, uint64_t dst, uint64_t vr)
+{
+    uint32_t r = 0;
+
+    switch (cc_op) {
+    case CC_OP_CONST0:
+    case CC_OP_CONST1:
+    case CC_OP_CONST2:
+    case CC_OP_CONST3:
+        /* cc_op value _is_ cc */
+        r = cc_op;
+        break;
+    case CC_OP_LTGT0_32:
+        r = cc_calc_ltgt0_32(dst);
+        break;
+    case CC_OP_LTGT0_64:
+        r =  cc_calc_ltgt0_64(dst);
+        break;
+    case CC_OP_LTGT_32:
+        r =  cc_calc_ltgt_32(src, dst);
+        break;
+    case CC_OP_LTGT_64:
+        r =  cc_calc_ltgt_64(src, dst);
+        break;
+    case CC_OP_LTUGTU_32:
+        r =  cc_calc_ltugtu_32(src, dst);
+        break;
+    case CC_OP_LTUGTU_64:
+        r =  cc_calc_ltugtu_64(src, dst);
+        break;
+    case CC_OP_TM_32:
+        r =  cc_calc_tm_32(src, dst);
+        break;
+    case CC_OP_TM_64:
+        r =  cc_calc_tm_64(src, dst);
+        break;
+    case CC_OP_NZ:
+        r =  cc_calc_nz(dst);
+        break;
+    case CC_OP_ADDU:
+        r = cc_calc_addu(src, dst);
+        break;
+    case CC_OP_SUBU:
+        r = cc_calc_subu(src, dst);
+        break;
+    case CC_OP_ADD_64:
+        r =  cc_calc_add_64(src, dst, vr);
+        break;
+    case CC_OP_SUB_64:
+        r =  cc_calc_sub_64(src, dst, vr);
+        break;
+    case CC_OP_ABS_64:
+        r =  cc_calc_abs_64(dst);
+        break;
+    case CC_OP_NABS_64:
+        r =  cc_calc_nabs_64(dst);
+        break;
+    case CC_OP_COMP_64:
+        r =  cc_calc_comp_64(dst);
+        break;
+    case CC_OP_MULS_64:
+        r = cc_calc_muls_64(src, dst);
+        break;
+
+    case CC_OP_ADD_32:
+        r =  cc_calc_add_32(src, dst, vr);
+        break;
+    case CC_OP_SUB_32:
+        r =  cc_calc_sub_32(src, dst, vr);
+        break;
+    case CC_OP_ABS_32:
+        r =  cc_calc_abs_32(dst);
+        break;
+    case CC_OP_NABS_32:
+        r =  cc_calc_nabs_32(dst);
+        break;
+    case CC_OP_COMP_32:
+        r =  cc_calc_comp_32(dst);
+        break;
+    case CC_OP_MULS_32:
+        r = cc_calc_muls_32(dst);
+        break;
+
+    case CC_OP_ICM:
+        r =  cc_calc_icm(src, dst);
+        break;
+    case CC_OP_SLA_32:
+        r =  cc_calc_sla_32(src, dst);
+        break;
+    case CC_OP_SLA_64:
+        r =  cc_calc_sla_64(src, dst);
+        break;
+    case CC_OP_FLOGR:
+        r = cc_calc_flogr(dst);
+        break;
+    case CC_OP_LCBB:
+        r = cc_calc_lcbb(dst);
+        break;
+    case CC_OP_VC:
+        r = cc_calc_vc(src, dst);
+        break;
+
+    case CC_OP_NZ_F32:
+        r = set_cc_nz_f32(dst);
+        break;
+    case CC_OP_NZ_F64:
+        r = set_cc_nz_f64(dst);
+        break;
+    case CC_OP_NZ_F128:
+        r = set_cc_nz_f128(make_float128(src, dst));
+        break;
+
+    default:
+        cpu_abort(env_cpu(env), "Unknown CC operation: %s\n", cc_name(cc_op));
+    }
+
+    HELPER_LOG("%s: %15s 0x%016lx 0x%016lx 0x%016lx = %d\n", __func__,
+               cc_name(cc_op), src, dst, vr, r);
+    return r;
+}
+
+uint32_t calc_cc(CPUS390XState *env, uint32_t cc_op, uint64_t src, uint64_t dst,
+                 uint64_t vr)
+{
+    return do_calc_cc(env, cc_op, src, dst, vr);
+}
+
+uint32_t HELPER(calc_cc)(CPUS390XState *env, uint32_t cc_op, uint64_t src,
+                         uint64_t dst, uint64_t vr)
+{
+    return do_calc_cc(env, cc_op, src, dst, vr);
+}
+
+#ifndef CONFIG_USER_ONLY
+void HELPER(load_psw)(CPUS390XState *env, uint64_t mask, uint64_t addr)
+{
+    s390_cpu_set_psw(env, mask, addr);
+    cpu_loop_exit(env_cpu(env));
+}
+
+void HELPER(sacf)(CPUS390XState *env, uint64_t a1)
+{
+    HELPER_LOG("%s: %16" PRIx64 "\n", __func__, a1);
+
+    switch (a1 & 0xf00) {
+    case 0x000:
+        env->psw.mask &= ~PSW_MASK_ASC;
+        env->psw.mask |= PSW_ASC_PRIMARY;
+        break;
+    case 0x100:
+        env->psw.mask &= ~PSW_MASK_ASC;
+        env->psw.mask |= PSW_ASC_SECONDARY;
+        break;
+    case 0x300:
+        env->psw.mask &= ~PSW_MASK_ASC;
+        env->psw.mask |= PSW_ASC_HOME;
+        break;
+    default:
+        HELPER_LOG("unknown sacf mode: %" PRIx64 "\n", a1);
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
+    }
+}
+#endif
diff --git a/target/s390x/tcg/crypto_helper.c b/target/s390x/tcg/crypto_helper.c
new file mode 100644
index 0000000000..138d9e7ad9
--- /dev/null
+++ b/target/s390x/tcg/crypto_helper.c
@@ -0,0 +1,61 @@
+/*
+ *  s390x crypto helpers
+ *
+ *  Copyright (c) 2017 Red Hat Inc
+ *
+ *  Authors:
+ *   David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "s390x-internal.h"
+#include "tcg_s390x.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+
+uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3,
+                     uint32_t type)
+{
+    const uintptr_t ra = GETPC();
+    const uint8_t mod = env->regs[0] & 0x80ULL;
+    const uint8_t fc = env->regs[0] & 0x7fULL;
+    uint8_t subfunc[16] = { 0 };
+    uint64_t param_addr;
+    int i;
+
+    switch (type) {
+    case S390_FEAT_TYPE_KMAC:
+    case S390_FEAT_TYPE_KIMD:
+    case S390_FEAT_TYPE_KLMD:
+    case S390_FEAT_TYPE_PCKMO:
+    case S390_FEAT_TYPE_PCC:
+        if (mod) {
+            tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+        }
+        break;
+    }
+
+    s390_get_feat_block(type, subfunc);
+    if (!test_be_bit(fc, subfunc)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    switch (fc) {
+    case 0: /* query subfunction */
+        for (i = 0; i < 16; i++) {
+            param_addr = wrap_address(env, env->regs[1] + i);
+            cpu_stb_data_ra(env, param_addr, subfunc[i], ra);
+        }
+        break;
+    default:
+        /* we don't implement any other subfunction yet */
+        g_assert_not_reached();
+    }
+
+    return 0;
+}
diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c
new file mode 100644
index 0000000000..a61917d04f
--- /dev/null
+++ b/target/s390x/tcg/excp_helper.c
@@ -0,0 +1,641 @@
+/*
+ * s390x exception / interrupt helpers
+ *
+ *  Copyright (c) 2009 Ulrich Hecht
+ *  Copyright (c) 2011 Alexander Graf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "exec/helper-proto.h"
+#include "qemu/timer.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "hw/s390x/ioinst.h"
+#include "exec/address-spaces.h"
+#include "tcg_s390x.h"
+#ifndef CONFIG_USER_ONLY
+#include "hw/s390x/s390_flic.h"
+#include "hw/boards.h"
+#endif
+
+void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env,
+                                              uint32_t code, uintptr_t ra)
+{
+    CPUState *cs = env_cpu(env);
+
+    cpu_restore_state(cs, ra, true);
+    qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n",
+                  env->psw.addr);
+    trigger_pgm_exception(env, code);
+    cpu_loop_exit(cs);
+}
+
+void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
+                                           uintptr_t ra)
+{
+    g_assert(dxc <= 0xff);
+#if !defined(CONFIG_USER_ONLY)
+    /* Store the DXC into the lowcore */
+    stl_phys(env_cpu(env)->as,
+             env->psa + offsetof(LowCore, data_exc_code), dxc);
+#endif
+
+    /* Store the DXC into the FPC if AFP is enabled */
+    if (env->cregs[0] & CR0_AFP) {
+        env->fpc = deposit32(env->fpc, 8, 8, dxc);
+    }
+    tcg_s390_program_interrupt(env, PGM_DATA, ra);
+}
+
+void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc,
+                                             uintptr_t ra)
+{
+    g_assert(vxc <= 0xff);
+#if !defined(CONFIG_USER_ONLY)
+    /* Always store the VXC into the lowcore, without AFP it is undefined */
+    stl_phys(env_cpu(env)->as,
+             env->psa + offsetof(LowCore, data_exc_code), vxc);
+#endif
+
+    /* Always store the VXC into the FPC, without AFP it is undefined */
+    env->fpc = deposit32(env->fpc, 8, 8, vxc);
+    tcg_s390_program_interrupt(env, PGM_VECTOR_PROCESSING, ra);
+}
+
+void HELPER(data_exception)(CPUS390XState *env, uint32_t dxc)
+{
+    tcg_s390_data_exception(env, dxc, GETPC());
+}
+
+#if defined(CONFIG_USER_ONLY)
+
+void s390_cpu_do_interrupt(CPUState *cs)
+{
+    cs->exception_index = -1;
+}
+
+bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                       MMUAccessType access_type, int mmu_idx,
+                       bool probe, uintptr_t retaddr)
+{
+    S390CPU *cpu = S390_CPU(cs);
+
+    trigger_pgm_exception(&cpu->env, PGM_ADDRESSING);
+    /* On real machines this value is dropped into LowMem.  Since this
+       is userland, simply put this someplace that cpu_loop can find it.  */
+    cpu->env.__excp_addr = address;
+    cpu_loop_exit_restore(cs, retaddr);
+}
+
+#else /* !CONFIG_USER_ONLY */
+
+static inline uint64_t cpu_mmu_idx_to_asc(int mmu_idx)
+{
+    switch (mmu_idx) {
+    case MMU_PRIMARY_IDX:
+        return PSW_ASC_PRIMARY;
+    case MMU_SECONDARY_IDX:
+        return PSW_ASC_SECONDARY;
+    case MMU_HOME_IDX:
+        return PSW_ASC_HOME;
+    default:
+        abort();
+    }
+}
+
+bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                       MMUAccessType access_type, int mmu_idx,
+                       bool probe, uintptr_t retaddr)
+{
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
+    target_ulong vaddr, raddr;
+    uint64_t asc, tec;
+    int prot, excp;
+
+    qemu_log_mask(CPU_LOG_MMU, "%s: addr 0x%" VADDR_PRIx " rw %d mmu_idx %d\n",
+                  __func__, address, access_type, mmu_idx);
+
+    vaddr = address;
+
+    if (mmu_idx < MMU_REAL_IDX) {
+        asc = cpu_mmu_idx_to_asc(mmu_idx);
+        /* 31-Bit mode */
+        if (!(env->psw.mask & PSW_MASK_64)) {
+            vaddr &= 0x7fffffff;
+        }
+        excp = mmu_translate(env, vaddr, access_type, asc, &raddr, &prot, &tec);
+    } else if (mmu_idx == MMU_REAL_IDX) {
+        /* 31-Bit mode */
+        if (!(env->psw.mask & PSW_MASK_64)) {
+            vaddr &= 0x7fffffff;
+        }
+        excp = mmu_translate_real(env, vaddr, access_type, &raddr, &prot, &tec);
+    } else {
+        g_assert_not_reached();
+    }
+
+    /* check out of RAM access */
+    if (!excp &&
+        !address_space_access_valid(&address_space_memory, raddr,
+                                    TARGET_PAGE_SIZE, access_type,
+                                    MEMTXATTRS_UNSPECIFIED)) {
+        MachineState *ms = MACHINE(qdev_get_machine());
+        qemu_log_mask(CPU_LOG_MMU,
+                      "%s: raddr %" PRIx64 " > ram_size %" PRIx64 "\n",
+                      __func__, (uint64_t)raddr, (uint64_t)ms->ram_size);
+        excp = PGM_ADDRESSING;
+        tec = 0; /* unused */
+    }
+
+    env->tlb_fill_exc = excp;
+    env->tlb_fill_tec = tec;
+
+    if (!excp) {
+        qemu_log_mask(CPU_LOG_MMU,
+                      "%s: set tlb %" PRIx64 " -> %" PRIx64 " (%x)\n",
+                      __func__, (uint64_t)vaddr, (uint64_t)raddr, prot);
+        tlb_set_page(cs, address & TARGET_PAGE_MASK, raddr, prot,
+                     mmu_idx, TARGET_PAGE_SIZE);
+        return true;
+    }
+    if (probe) {
+        return false;
+    }
+
+    if (excp != PGM_ADDRESSING) {
+        stq_phys(env_cpu(env)->as,
+                 env->psa + offsetof(LowCore, trans_exc_code), tec);
+    }
+
+    /*
+     * For data accesses, ILEN will be filled in from the unwind info,
+     * within cpu_loop_exit_restore.  For code accesses, retaddr == 0,
+     * and so unwinding will not occur.  However, ILEN is also undefined
+     * for that case -- we choose to set ILEN = 2.
+     */
+    env->int_pgm_ilen = 2;
+    trigger_pgm_exception(env, excp);
+    cpu_loop_exit_restore(cs, retaddr);
+}
+
+static void do_program_interrupt(CPUS390XState *env)
+{
+    uint64_t mask, addr;
+    LowCore *lowcore;
+    int ilen = env->int_pgm_ilen;
+
+    assert(ilen == 2 || ilen == 4 || ilen == 6);
+
+    switch (env->int_pgm_code) {
+    case PGM_PER:
+        if (env->per_perc_atmid & PER_CODE_EVENT_NULLIFICATION) {
+            break;
+        }
+        /* FALL THROUGH */
+    case PGM_OPERATION:
+    case PGM_PRIVILEGED:
+    case PGM_EXECUTE:
+    case PGM_PROTECTION:
+    case PGM_ADDRESSING:
+    case PGM_SPECIFICATION:
+    case PGM_DATA:
+    case PGM_FIXPT_OVERFLOW:
+    case PGM_FIXPT_DIVIDE:
+    case PGM_DEC_OVERFLOW:
+    case PGM_DEC_DIVIDE:
+    case PGM_HFP_EXP_OVERFLOW:
+    case PGM_HFP_EXP_UNDERFLOW:
+    case PGM_HFP_SIGNIFICANCE:
+    case PGM_HFP_DIVIDE:
+    case PGM_TRANS_SPEC:
+    case PGM_SPECIAL_OP:
+    case PGM_OPERAND:
+    case PGM_HFP_SQRT:
+    case PGM_PC_TRANS_SPEC:
+    case PGM_ALET_SPEC:
+    case PGM_MONITOR:
+        /* advance the PSW if our exception is not nullifying */
+        env->psw.addr += ilen;
+        break;
+    }
+
+    qemu_log_mask(CPU_LOG_INT,
+                  "%s: code=0x%x ilen=%d psw: %" PRIx64 " %" PRIx64 "\n",
+                  __func__, env->int_pgm_code, ilen, env->psw.mask,
+                  env->psw.addr);
+
+    lowcore = cpu_map_lowcore(env);
+
+    /* Signal PER events with the exception.  */
+    if (env->per_perc_atmid) {
+        env->int_pgm_code |= PGM_PER;
+        lowcore->per_address = cpu_to_be64(env->per_address);
+        lowcore->per_perc_atmid = cpu_to_be16(env->per_perc_atmid);
+        env->per_perc_atmid = 0;
+    }
+
+    lowcore->pgm_ilen = cpu_to_be16(ilen);
+    lowcore->pgm_code = cpu_to_be16(env->int_pgm_code);
+    lowcore->program_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env));
+    lowcore->program_old_psw.addr = cpu_to_be64(env->psw.addr);
+    mask = be64_to_cpu(lowcore->program_new_psw.mask);
+    addr = be64_to_cpu(lowcore->program_new_psw.addr);
+    lowcore->per_breaking_event_addr = cpu_to_be64(env->gbea);
+
+    cpu_unmap_lowcore(lowcore);
+
+    s390_cpu_set_psw(env, mask, addr);
+}
+
+static void do_svc_interrupt(CPUS390XState *env)
+{
+    uint64_t mask, addr;
+    LowCore *lowcore;
+
+    lowcore = cpu_map_lowcore(env);
+
+    lowcore->svc_code = cpu_to_be16(env->int_svc_code);
+    lowcore->svc_ilen = cpu_to_be16(env->int_svc_ilen);
+    lowcore->svc_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env));
+    lowcore->svc_old_psw.addr = cpu_to_be64(env->psw.addr + env->int_svc_ilen);
+    mask = be64_to_cpu(lowcore->svc_new_psw.mask);
+    addr = be64_to_cpu(lowcore->svc_new_psw.addr);
+
+    cpu_unmap_lowcore(lowcore);
+
+    s390_cpu_set_psw(env, mask, addr);
+
+    /* When a PER event is pending, the PER exception has to happen
+       immediately after the SERVICE CALL one.  */
+    if (env->per_perc_atmid) {
+        env->int_pgm_code = PGM_PER;
+        env->int_pgm_ilen = env->int_svc_ilen;
+        do_program_interrupt(env);
+    }
+}
+
+#define VIRTIO_SUBCODE_64 0x0D00
+
+static void do_ext_interrupt(CPUS390XState *env)
+{
+    QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
+    S390CPU *cpu = env_archcpu(env);
+    uint64_t mask, addr;
+    uint16_t cpu_addr;
+    LowCore *lowcore;
+
+    if (!(env->psw.mask & PSW_MASK_EXT)) {
+        cpu_abort(CPU(cpu), "Ext int w/o ext mask\n");
+    }
+
+    lowcore = cpu_map_lowcore(env);
+
+    if ((env->pending_int & INTERRUPT_EMERGENCY_SIGNAL) &&
+        (env->cregs[0] & CR0_EMERGENCY_SIGNAL_SC)) {
+        MachineState *ms = MACHINE(qdev_get_machine());
+        unsigned int max_cpus = ms->smp.max_cpus;
+
+        lowcore->ext_int_code = cpu_to_be16(EXT_EMERGENCY);
+        cpu_addr = find_first_bit(env->emergency_signals, S390_MAX_CPUS);
+        g_assert(cpu_addr < S390_MAX_CPUS);
+        lowcore->cpu_addr = cpu_to_be16(cpu_addr);
+        clear_bit(cpu_addr, env->emergency_signals);
+        if (bitmap_empty(env->emergency_signals, max_cpus)) {
+            env->pending_int &= ~INTERRUPT_EMERGENCY_SIGNAL;
+        }
+    } else if ((env->pending_int & INTERRUPT_EXTERNAL_CALL) &&
+               (env->cregs[0] & CR0_EXTERNAL_CALL_SC)) {
+        lowcore->ext_int_code = cpu_to_be16(EXT_EXTERNAL_CALL);
+        lowcore->cpu_addr = cpu_to_be16(env->external_call_addr);
+        env->pending_int &= ~INTERRUPT_EXTERNAL_CALL;
+    } else if ((env->pending_int & INTERRUPT_EXT_CLOCK_COMPARATOR) &&
+               (env->cregs[0] & CR0_CKC_SC)) {
+        lowcore->ext_int_code = cpu_to_be16(EXT_CLOCK_COMP);
+        lowcore->cpu_addr = 0;
+        env->pending_int &= ~INTERRUPT_EXT_CLOCK_COMPARATOR;
+    } else if ((env->pending_int & INTERRUPT_EXT_CPU_TIMER) &&
+               (env->cregs[0] & CR0_CPU_TIMER_SC)) {
+        lowcore->ext_int_code = cpu_to_be16(EXT_CPU_TIMER);
+        lowcore->cpu_addr = 0;
+        env->pending_int &= ~INTERRUPT_EXT_CPU_TIMER;
+    } else if (qemu_s390_flic_has_service(flic) &&
+               (env->cregs[0] & CR0_SERVICE_SC)) {
+        uint32_t param;
+
+        param = qemu_s390_flic_dequeue_service(flic);
+        lowcore->ext_int_code = cpu_to_be16(EXT_SERVICE);
+        lowcore->ext_params = cpu_to_be32(param);
+        lowcore->cpu_addr = 0;
+    } else {
+        g_assert_not_reached();
+    }
+
+    mask = be64_to_cpu(lowcore->external_new_psw.mask);
+    addr = be64_to_cpu(lowcore->external_new_psw.addr);
+    lowcore->external_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env));
+    lowcore->external_old_psw.addr = cpu_to_be64(env->psw.addr);
+
+    cpu_unmap_lowcore(lowcore);
+
+    s390_cpu_set_psw(env, mask, addr);
+}
+
+static void do_io_interrupt(CPUS390XState *env)
+{
+    QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
+    uint64_t mask, addr;
+    QEMUS390FlicIO *io;
+    LowCore *lowcore;
+
+    g_assert(env->psw.mask & PSW_MASK_IO);
+    io = qemu_s390_flic_dequeue_io(flic, env->cregs[6]);
+    g_assert(io);
+
+    lowcore = cpu_map_lowcore(env);
+
+    lowcore->subchannel_id = cpu_to_be16(io->id);
+    lowcore->subchannel_nr = cpu_to_be16(io->nr);
+    lowcore->io_int_parm = cpu_to_be32(io->parm);
+    lowcore->io_int_word = cpu_to_be32(io->word);
+    lowcore->io_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env));
+    lowcore->io_old_psw.addr = cpu_to_be64(env->psw.addr);
+    mask = be64_to_cpu(lowcore->io_new_psw.mask);
+    addr = be64_to_cpu(lowcore->io_new_psw.addr);
+
+    cpu_unmap_lowcore(lowcore);
+    g_free(io);
+
+    s390_cpu_set_psw(env, mask, addr);
+}
+
+typedef struct MchkExtSaveArea {
+    uint64_t    vregs[32][2];                     /* 0x0000 */
+    uint8_t     pad_0x0200[0x0400 - 0x0200];      /* 0x0200 */
+} MchkExtSaveArea;
+QEMU_BUILD_BUG_ON(sizeof(MchkExtSaveArea) != 1024);
+
+static int mchk_store_vregs(CPUS390XState *env, uint64_t mcesao)
+{
+    hwaddr len = sizeof(MchkExtSaveArea);
+    MchkExtSaveArea *sa;
+    int i;
+
+    sa = cpu_physical_memory_map(mcesao, &len, true);
+    if (!sa) {
+        return -EFAULT;
+    }
+    if (len != sizeof(MchkExtSaveArea)) {
+        cpu_physical_memory_unmap(sa, len, 1, 0);
+        return -EFAULT;
+    }
+
+    for (i = 0; i < 32; i++) {
+        sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0]);
+        sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1]);
+    }
+
+    cpu_physical_memory_unmap(sa, len, 1, len);
+    return 0;
+}
+
+static void do_mchk_interrupt(CPUS390XState *env)
+{
+    QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
+    uint64_t mcic = s390_build_validity_mcic() | MCIC_SC_CP;
+    uint64_t mask, addr, mcesao = 0;
+    LowCore *lowcore;
+    int i;
+
+    /* for now we only support channel report machine checks (floating) */
+    g_assert(env->psw.mask & PSW_MASK_MCHECK);
+    g_assert(env->cregs[14] & CR14_CHANNEL_REPORT_SC);
+
+    qemu_s390_flic_dequeue_crw_mchk(flic);
+
+    lowcore = cpu_map_lowcore(env);
+
+    /* extended save area */
+    if (mcic & MCIC_VB_VR) {
+        /* length and alignment is 1024 bytes */
+        mcesao = be64_to_cpu(lowcore->mcesad) & ~0x3ffull;
+    }
+
+    /* try to store vector registers */
+    if (!mcesao || mchk_store_vregs(env, mcesao)) {
+        mcic &= ~MCIC_VB_VR;
+    }
+
+    /* we are always in z/Architecture mode */
+    lowcore->ar_access_id = 1;
+
+    for (i = 0; i < 16; i++) {
+        lowcore->floating_pt_save_area[i] = cpu_to_be64(*get_freg(env, i));
+        lowcore->gpregs_save_area[i] = cpu_to_be64(env->regs[i]);
+        lowcore->access_regs_save_area[i] = cpu_to_be32(env->aregs[i]);
+        lowcore->cregs_save_area[i] = cpu_to_be64(env->cregs[i]);
+    }
+    lowcore->prefixreg_save_area = cpu_to_be32(env->psa);
+    lowcore->fpt_creg_save_area = cpu_to_be32(env->fpc);
+    lowcore->tod_progreg_save_area = cpu_to_be32(env->todpr);
+    lowcore->cpu_timer_save_area = cpu_to_be64(env->cputm);
+    lowcore->clock_comp_save_area = cpu_to_be64(env->ckc >> 8);
+
+    lowcore->mcic = cpu_to_be64(mcic);
+    lowcore->mcck_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env));
+    lowcore->mcck_old_psw.addr = cpu_to_be64(env->psw.addr);
+    mask = be64_to_cpu(lowcore->mcck_new_psw.mask);
+    addr = be64_to_cpu(lowcore->mcck_new_psw.addr);
+
+    cpu_unmap_lowcore(lowcore);
+
+    s390_cpu_set_psw(env, mask, addr);
+}
+
+void s390_cpu_do_interrupt(CPUState *cs)
+{
+    QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
+    bool stopped = false;
+
+    qemu_log_mask(CPU_LOG_INT, "%s: %d at psw=%" PRIx64 ":%" PRIx64 "\n",
+                  __func__, cs->exception_index, env->psw.mask, env->psw.addr);
+
+try_deliver:
+    /* handle machine checks */
+    if (cs->exception_index == -1 && s390_cpu_has_mcck_int(cpu)) {
+        cs->exception_index = EXCP_MCHK;
+    }
+    /* handle external interrupts */
+    if (cs->exception_index == -1 && s390_cpu_has_ext_int(cpu)) {
+        cs->exception_index = EXCP_EXT;
+    }
+    /* handle I/O interrupts */
+    if (cs->exception_index == -1 && s390_cpu_has_io_int(cpu)) {
+        cs->exception_index = EXCP_IO;
+    }
+    /* RESTART interrupt */
+    if (cs->exception_index == -1 && s390_cpu_has_restart_int(cpu)) {
+        cs->exception_index = EXCP_RESTART;
+    }
+    /* STOP interrupt has least priority */
+    if (cs->exception_index == -1 && s390_cpu_has_stop_int(cpu)) {
+        cs->exception_index = EXCP_STOP;
+    }
+
+    switch (cs->exception_index) {
+    case EXCP_PGM:
+        do_program_interrupt(env);
+        break;
+    case EXCP_SVC:
+        do_svc_interrupt(env);
+        break;
+    case EXCP_EXT:
+        do_ext_interrupt(env);
+        break;
+    case EXCP_IO:
+        do_io_interrupt(env);
+        break;
+    case EXCP_MCHK:
+        do_mchk_interrupt(env);
+        break;
+    case EXCP_RESTART:
+        do_restart_interrupt(env);
+        break;
+    case EXCP_STOP:
+        do_stop_interrupt(env);
+        stopped = true;
+        break;
+    }
+
+    if (cs->exception_index != -1 && !stopped) {
+        /* check if there are more pending interrupts to deliver */
+        cs->exception_index = -1;
+        goto try_deliver;
+    }
+    cs->exception_index = -1;
+
+    /* we might still have pending interrupts, but not deliverable */
+    if (!env->pending_int && !qemu_s390_flic_has_any(flic)) {
+        cs->interrupt_request &= ~CPU_INTERRUPT_HARD;
+    }
+
+    /* WAIT PSW during interrupt injection or STOP interrupt */
+    if ((env->psw.mask & PSW_MASK_WAIT) || stopped) {
+        /* don't trigger a cpu_loop_exit(), use an interrupt instead */
+        cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HALT);
+    } else if (cs->halted) {
+        /* unhalt if we had a WAIT PSW somehwere in our injection chain */
+        s390_cpu_unhalt(cpu);
+    }
+}
+
+bool s390_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
+{
+    if (interrupt_request & CPU_INTERRUPT_HARD) {
+        S390CPU *cpu = S390_CPU(cs);
+        CPUS390XState *env = &cpu->env;
+
+        if (env->ex_value) {
+            /* Execution of the target insn is indivisible from
+               the parent EXECUTE insn.  */
+            return false;
+        }
+        if (s390_cpu_has_int(cpu)) {
+            s390_cpu_do_interrupt(cs);
+            return true;
+        }
+        if (env->psw.mask & PSW_MASK_WAIT) {
+            /* Woken up because of a floating interrupt but it has already
+             * been delivered. Go back to sleep. */
+            cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HALT);
+        }
+    }
+    return false;
+}
+
+void s390x_cpu_debug_excp_handler(CPUState *cs)
+{
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
+    CPUWatchpoint *wp_hit = cs->watchpoint_hit;
+
+    if (wp_hit && wp_hit->flags & BP_CPU) {
+        /* FIXME: When the storage-alteration-space control bit is set,
+           the exception should only be triggered if the memory access
+           is done using an address space with the storage-alteration-event
+           bit set.  We have no way to detect that with the current
+           watchpoint code.  */
+        cs->watchpoint_hit = NULL;
+
+        env->per_address = env->psw.addr;
+        env->per_perc_atmid |= PER_CODE_EVENT_STORE | get_per_atmid(env);
+        /* FIXME: We currently no way to detect the address space used
+           to trigger the watchpoint.  For now just consider it is the
+           current default ASC. This turn to be true except when MVCP
+           and MVCS instrutions are not used.  */
+        env->per_perc_atmid |= env->psw.mask & (PSW_MASK_ASC) >> 46;
+
+        /*
+         * Remove all watchpoints to re-execute the code.  A PER exception
+         * will be triggered, it will call s390_cpu_set_psw which will
+         * recompute the watchpoints.
+         */
+        cpu_watchpoint_remove_all(cs, BP_CPU);
+        cpu_loop_exit_noexc(cs);
+    }
+}
+
+/* Unaligned accesses are only diagnosed with MO_ALIGN.  At the moment,
+   this is only for the atomic operations, for which we want to raise a
+   specification exception.  */
+void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                   MMUAccessType access_type,
+                                   int mmu_idx, uintptr_t retaddr)
+{
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
+
+    tcg_s390_program_interrupt(env, PGM_SPECIFICATION, retaddr);
+}
+
+static void QEMU_NORETURN monitor_event(CPUS390XState *env,
+                                        uint64_t monitor_code,
+                                        uint8_t monitor_class, uintptr_t ra)
+{
+    /* Store the Monitor Code and the Monitor Class Number into the lowcore */
+    stq_phys(env_cpu(env)->as,
+             env->psa + offsetof(LowCore, monitor_code), monitor_code);
+    stw_phys(env_cpu(env)->as,
+             env->psa + offsetof(LowCore, mon_class_num), monitor_class);
+
+    tcg_s390_program_interrupt(env, PGM_MONITOR, ra);
+}
+
+void HELPER(monitor_call)(CPUS390XState *env, uint64_t monitor_code,
+                          uint32_t monitor_class)
+{
+    g_assert(monitor_class <= 0xff);
+
+    if (env->cregs[8] & (0x8000 >> monitor_class)) {
+        monitor_event(env, monitor_code, monitor_class, GETPC());
+    }
+}
+
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c
new file mode 100644
index 0000000000..4067205405
--- /dev/null
+++ b/target/s390x/tcg/fpu_helper.c
@@ -0,0 +1,976 @@
+/*
+ *  S/390 FPU helper routines
+ *
+ *  Copyright (c) 2009 Ulrich Hecht
+ *  Copyright (c) 2009 Alexander Graf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "tcg_s390x.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+
+/* #define DEBUG_HELPER */
+#ifdef DEBUG_HELPER
+#define HELPER_LOG(x...) qemu_log(x)
+#else
+#define HELPER_LOG(x...)
+#endif
+
+#define RET128(F) (env->retxl = F.low, F.high)
+
+uint8_t s390_softfloat_exc_to_ieee(unsigned int exc)
+{
+    uint8_t s390_exc = 0;
+
+    s390_exc |= (exc & float_flag_invalid) ? S390_IEEE_MASK_INVALID : 0;
+    s390_exc |= (exc & float_flag_divbyzero) ? S390_IEEE_MASK_DIVBYZERO : 0;
+    s390_exc |= (exc & float_flag_overflow) ? S390_IEEE_MASK_OVERFLOW : 0;
+    s390_exc |= (exc & float_flag_underflow) ? S390_IEEE_MASK_UNDERFLOW : 0;
+    s390_exc |= (exc & float_flag_inexact) ? S390_IEEE_MASK_INEXACT : 0;
+
+    return s390_exc;
+}
+
+/* Should be called after any operation that may raise IEEE exceptions.  */
+static void handle_exceptions(CPUS390XState *env, bool XxC, uintptr_t retaddr)
+{
+    unsigned s390_exc, qemu_exc;
+
+    /* Get the exceptions raised by the current operation.  Reset the
+       fpu_status contents so that the next operation has a clean slate.  */
+    qemu_exc = env->fpu_status.float_exception_flags;
+    if (qemu_exc == 0) {
+        return;
+    }
+    env->fpu_status.float_exception_flags = 0;
+    s390_exc = s390_softfloat_exc_to_ieee(qemu_exc);
+
+    /*
+     * IEEE-Underflow exception recognition exists if a tininess condition
+     * (underflow) exists and
+     * - The mask bit in the FPC is zero and the result is inexact
+     * - The mask bit in the FPC is one
+     * So tininess conditions that are not inexact don't trigger any
+     * underflow action in case the mask bit is not one.
+     */
+    if (!(s390_exc & S390_IEEE_MASK_INEXACT) &&
+        !((env->fpc >> 24) & S390_IEEE_MASK_UNDERFLOW)) {
+        s390_exc &= ~S390_IEEE_MASK_UNDERFLOW;
+    }
+
+    /*
+     * FIXME:
+     * 1. Right now, all inexact conditions are inidicated as
+     *    "truncated" (0) and never as "incremented" (1) in the DXC.
+     * 2. Only traps due to invalid/divbyzero are suppressing. Other traps
+     *    are completing, meaning the target register has to be written!
+     *    This, however will mean that we have to write the register before
+     *    triggering the trap - impossible right now.
+     */
+
+    /*
+     * invalid/divbyzero cannot coexist with other conditions.
+     * overflow/underflow however can coexist with inexact, we have to
+     * handle it separatly.
+     */
+    if (s390_exc & ~S390_IEEE_MASK_INEXACT) {
+        if (s390_exc & ~S390_IEEE_MASK_INEXACT & env->fpc >> 24) {
+            /* trap condition - inexact reported along */
+            tcg_s390_data_exception(env, s390_exc, retaddr);
+        }
+        /* nontrap condition - inexact handled differently */
+        env->fpc |= (s390_exc & ~S390_IEEE_MASK_INEXACT) << 16;
+    }
+
+    /* inexact handling */
+    if (s390_exc & S390_IEEE_MASK_INEXACT && !XxC) {
+        /* trap condition - overflow/underflow _not_ reported along */
+        if (s390_exc & S390_IEEE_MASK_INEXACT & env->fpc >> 24) {
+            tcg_s390_data_exception(env, s390_exc & S390_IEEE_MASK_INEXACT,
+                                    retaddr);
+        }
+        /* nontrap condition */
+        env->fpc |= (s390_exc & S390_IEEE_MASK_INEXACT) << 16;
+    }
+}
+
+int float_comp_to_cc(CPUS390XState *env, FloatRelation float_compare)
+{
+    switch (float_compare) {
+    case float_relation_equal:
+        return 0;
+    case float_relation_less:
+        return 1;
+    case float_relation_greater:
+        return 2;
+    case float_relation_unordered:
+        return 3;
+    default:
+        cpu_abort(env_cpu(env), "unknown return value for float compare\n");
+    }
+}
+
+/* condition codes for unary FP ops */
+uint32_t set_cc_nz_f32(float32 v)
+{
+    if (float32_is_any_nan(v)) {
+        return 3;
+    } else if (float32_is_zero(v)) {
+        return 0;
+    } else if (float32_is_neg(v)) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+
+uint32_t set_cc_nz_f64(float64 v)
+{
+    if (float64_is_any_nan(v)) {
+        return 3;
+    } else if (float64_is_zero(v)) {
+        return 0;
+    } else if (float64_is_neg(v)) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+
+uint32_t set_cc_nz_f128(float128 v)
+{
+    if (float128_is_any_nan(v)) {
+        return 3;
+    } else if (float128_is_zero(v)) {
+        return 0;
+    } else if (float128_is_neg(v)) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+
+/* condition codes for FP to integer conversion ops */
+static uint32_t set_cc_conv_f32(float32 v, float_status *stat)
+{
+    if (stat->float_exception_flags & float_flag_invalid) {
+        return 3;
+    } else {
+        return set_cc_nz_f32(v);
+    }
+}
+
+static uint32_t set_cc_conv_f64(float64 v, float_status *stat)
+{
+    if (stat->float_exception_flags & float_flag_invalid) {
+        return 3;
+    } else {
+        return set_cc_nz_f64(v);
+    }
+}
+
+static uint32_t set_cc_conv_f128(float128 v, float_status *stat)
+{
+    if (stat->float_exception_flags & float_flag_invalid) {
+        return 3;
+    } else {
+        return set_cc_nz_f128(v);
+    }
+}
+
+static inline uint8_t round_from_m34(uint32_t m34)
+{
+    return extract32(m34, 0, 4);
+}
+
+static inline bool xxc_from_m34(uint32_t m34)
+{
+    /* XxC is bit 1 of m4 */
+    return extract32(m34, 4 + 3 - 1, 1);
+}
+
+/* 32-bit FP addition */
+uint64_t HELPER(aeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float32 ret = float32_add(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 64-bit FP addition */
+uint64_t HELPER(adb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float64 ret = float64_add(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 128-bit FP addition */
+uint64_t HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                     uint64_t bh, uint64_t bl)
+{
+    float128 ret = float128_add(make_float128(ah, al),
+                                make_float128(bh, bl),
+                                &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+/* 32-bit FP subtraction */
+uint64_t HELPER(seb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float32 ret = float32_sub(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 64-bit FP subtraction */
+uint64_t HELPER(sdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float64 ret = float64_sub(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 128-bit FP subtraction */
+uint64_t HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                     uint64_t bh, uint64_t bl)
+{
+    float128 ret = float128_sub(make_float128(ah, al),
+                                make_float128(bh, bl),
+                                &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+/* 32-bit FP division */
+uint64_t HELPER(deb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float32 ret = float32_div(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 64-bit FP division */
+uint64_t HELPER(ddb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float64 ret = float64_div(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 128-bit FP division */
+uint64_t HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                     uint64_t bh, uint64_t bl)
+{
+    float128 ret = float128_div(make_float128(ah, al),
+                                make_float128(bh, bl),
+                                &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+/* 32-bit FP multiplication */
+uint64_t HELPER(meeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float32 ret = float32_mul(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 64-bit FP multiplication */
+uint64_t HELPER(mdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float64 ret = float64_mul(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 64/32-bit FP multiplication */
+uint64_t HELPER(mdeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float64 ret = float32_to_float64(f2, &env->fpu_status);
+    ret = float64_mul(f1, ret, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 128-bit FP multiplication */
+uint64_t HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                     uint64_t bh, uint64_t bl)
+{
+    float128 ret = float128_mul(make_float128(ah, al),
+                                make_float128(bh, bl),
+                                &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+/* 128/64-bit FP multiplication */
+uint64_t HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                      uint64_t f2)
+{
+    float128 ret = float64_to_float128(f2, &env->fpu_status);
+    ret = float128_mul(make_float128(ah, al), ret, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+/* convert 32-bit float to 64-bit float */
+uint64_t HELPER(ldeb)(CPUS390XState *env, uint64_t f2)
+{
+    float64 ret = float32_to_float64(f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* convert 128-bit float to 64-bit float */
+uint64_t HELPER(ldxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                      uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float64 ret = float128_to_float64(make_float128(ah, al), &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* convert 64-bit float to 128-bit float */
+uint64_t HELPER(lxdb)(CPUS390XState *env, uint64_t f2)
+{
+    float128 ret = float64_to_float128(f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+/* convert 32-bit float to 128-bit float */
+uint64_t HELPER(lxeb)(CPUS390XState *env, uint64_t f2)
+{
+    float128 ret = float32_to_float128(f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+/* convert 64-bit float to 32-bit float */
+uint64_t HELPER(ledb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float32 ret = float64_to_float32(f2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* convert 128-bit float to 32-bit float */
+uint64_t HELPER(lexb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                      uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float32 ret = float128_to_float32(make_float128(ah, al), &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* 32-bit FP compare */
+uint32_t HELPER(ceb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    FloatRelation cmp = float32_compare_quiet(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return float_comp_to_cc(env, cmp);
+}
+
+/* 64-bit FP compare */
+uint32_t HELPER(cdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    FloatRelation cmp = float64_compare_quiet(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return float_comp_to_cc(env, cmp);
+}
+
+/* 128-bit FP compare */
+uint32_t HELPER(cxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                     uint64_t bh, uint64_t bl)
+{
+    FloatRelation cmp = float128_compare_quiet(make_float128(ah, al),
+                                               make_float128(bh, bl),
+                                               &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return float_comp_to_cc(env, cmp);
+}
+
+int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3)
+{
+    int ret = env->fpu_status.float_rounding_mode;
+
+    switch (m3) {
+    case 0:
+        /* current mode */
+        break;
+    case 1:
+        /* round to nearest with ties away from 0 */
+        set_float_rounding_mode(float_round_ties_away, &env->fpu_status);
+        break;
+    case 3:
+        /* round to prepare for shorter precision */
+        set_float_rounding_mode(float_round_to_odd, &env->fpu_status);
+        break;
+    case 4:
+        /* round to nearest with ties to even */
+        set_float_rounding_mode(float_round_nearest_even, &env->fpu_status);
+        break;
+    case 5:
+        /* round to zero */
+        set_float_rounding_mode(float_round_to_zero, &env->fpu_status);
+        break;
+    case 6:
+        /* round to +inf */
+        set_float_rounding_mode(float_round_up, &env->fpu_status);
+        break;
+    case 7:
+        /* round to -inf */
+        set_float_rounding_mode(float_round_down, &env->fpu_status);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return ret;
+}
+
+void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode)
+{
+    set_float_rounding_mode(old_mode, &env->fpu_status);
+}
+
+/* convert 64-bit int to 32-bit float */
+uint64_t HELPER(cegb)(CPUS390XState *env, int64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float32 ret = int64_to_float32(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* convert 64-bit int to 64-bit float */
+uint64_t HELPER(cdgb)(CPUS390XState *env, int64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float64 ret = int64_to_float64(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* convert 64-bit int to 128-bit float */
+uint64_t HELPER(cxgb)(CPUS390XState *env, int64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float128 ret = int64_to_float128(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return RET128(ret);
+}
+
+/* convert 64-bit uint to 32-bit float */
+uint64_t HELPER(celgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float32 ret = uint64_to_float32(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* convert 64-bit uint to 64-bit float */
+uint64_t HELPER(cdlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float64 ret = uint64_to_float64(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* convert 64-bit uint to 128-bit float */
+uint64_t HELPER(cxlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float128 ret = uint64_to_float128(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return RET128(ret);
+}
+
+/* convert 32-bit float to 64-bit int */
+uint64_t HELPER(cgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    int64_t ret = float32_to_int64(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f32(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float32_is_any_nan(v2)) {
+        return INT64_MIN;
+    }
+    return ret;
+}
+
+/* convert 64-bit float to 64-bit int */
+uint64_t HELPER(cgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    int64_t ret = float64_to_int64(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f64(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float64_is_any_nan(v2)) {
+        return INT64_MIN;
+    }
+    return ret;
+}
+
+/* convert 128-bit float to 64-bit int */
+uint64_t HELPER(cgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float128 v2 = make_float128(h, l);
+    int64_t ret = float128_to_int64(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float128_is_any_nan(v2)) {
+        return INT64_MIN;
+    }
+    return ret;
+}
+
+/* convert 32-bit float to 32-bit int */
+uint64_t HELPER(cfeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    int32_t ret = float32_to_int32(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f32(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float32_is_any_nan(v2)) {
+        return INT32_MIN;
+    }
+    return ret;
+}
+
+/* convert 64-bit float to 32-bit int */
+uint64_t HELPER(cfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    int32_t ret = float64_to_int32(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f64(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float64_is_any_nan(v2)) {
+        return INT32_MIN;
+    }
+    return ret;
+}
+
+/* convert 128-bit float to 32-bit int */
+uint64_t HELPER(cfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float128 v2 = make_float128(h, l);
+    int32_t ret = float128_to_int32(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float128_is_any_nan(v2)) {
+        return INT32_MIN;
+    }
+    return ret;
+}
+
+/* convert 32-bit float to 64-bit uint */
+uint64_t HELPER(clgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    uint64_t ret = float32_to_uint64(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f32(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float32_is_any_nan(v2)) {
+        return 0;
+    }
+    return ret;
+}
+
+/* convert 64-bit float to 64-bit uint */
+uint64_t HELPER(clgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    uint64_t ret = float64_to_uint64(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f64(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float64_is_any_nan(v2)) {
+        return 0;
+    }
+    return ret;
+}
+
+/* convert 128-bit float to 64-bit uint */
+uint64_t HELPER(clgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float128 v2 = make_float128(h, l);
+    uint64_t ret = float128_to_uint64(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float128_is_any_nan(v2)) {
+        return 0;
+    }
+    return ret;
+}
+
+/* convert 32-bit float to 32-bit uint */
+uint64_t HELPER(clfeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    uint32_t ret = float32_to_uint32(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f32(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float32_is_any_nan(v2)) {
+        return 0;
+    }
+    return ret;
+}
+
+/* convert 64-bit float to 32-bit uint */
+uint64_t HELPER(clfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    uint32_t ret = float64_to_uint32(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f64(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float64_is_any_nan(v2)) {
+        return 0;
+    }
+    return ret;
+}
+
+/* convert 128-bit float to 32-bit uint */
+uint64_t HELPER(clfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float128 v2 = make_float128(h, l);
+    uint32_t ret = float128_to_uint32(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float128_is_any_nan(v2)) {
+        return 0;
+    }
+    return ret;
+}
+
+/* round to integer 32-bit */
+uint64_t HELPER(fieb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float32 ret = float32_round_to_int(f2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* round to integer 64-bit */
+uint64_t HELPER(fidb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float64 ret = float64_round_to_int(f2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* round to integer 128-bit */
+uint64_t HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                      uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float128 ret = float128_round_to_int(make_float128(ah, al),
+                                         &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return RET128(ret);
+}
+
+/* 32-bit FP compare and signal */
+uint32_t HELPER(keb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    FloatRelation cmp = float32_compare(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return float_comp_to_cc(env, cmp);
+}
+
+/* 64-bit FP compare and signal */
+uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    FloatRelation cmp = float64_compare(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return float_comp_to_cc(env, cmp);
+}
+
+/* 128-bit FP compare and signal */
+uint32_t HELPER(kxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                     uint64_t bh, uint64_t bl)
+{
+    FloatRelation cmp = float128_compare(make_float128(ah, al),
+                                         make_float128(bh, bl),
+                                         &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return float_comp_to_cc(env, cmp);
+}
+
+/* 32-bit FP multiply and add */
+uint64_t HELPER(maeb)(CPUS390XState *env, uint64_t f1,
+                      uint64_t f2, uint64_t f3)
+{
+    float32 ret = float32_muladd(f2, f3, f1, 0, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 64-bit FP multiply and add */
+uint64_t HELPER(madb)(CPUS390XState *env, uint64_t f1,
+                      uint64_t f2, uint64_t f3)
+{
+    float64 ret = float64_muladd(f2, f3, f1, 0, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 32-bit FP multiply and subtract */
+uint64_t HELPER(mseb)(CPUS390XState *env, uint64_t f1,
+                      uint64_t f2, uint64_t f3)
+{
+    float32 ret = float32_muladd(f2, f3, f1, float_muladd_negate_c,
+                                 &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 64-bit FP multiply and subtract */
+uint64_t HELPER(msdb)(CPUS390XState *env, uint64_t f1,
+                      uint64_t f2, uint64_t f3)
+{
+    float64 ret = float64_muladd(f2, f3, f1, float_muladd_negate_c,
+                                 &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* The rightmost bit has the number 11. */
+static inline uint16_t dcmask(int bit, bool neg)
+{
+    return 1 << (11 - bit - neg);
+}
+
+#define DEF_FLOAT_DCMASK(_TYPE) \
+uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1)              \
+{                                                                  \
+    const bool neg = _TYPE##_is_neg(f1);                           \
+                                                                   \
+    /* Sorted by most common cases - only one class is possible */ \
+    if (_TYPE##_is_normal(f1)) {                                   \
+        return dcmask(2, neg);                                     \
+    } else if (_TYPE##_is_zero(f1)) {                              \
+        return dcmask(0, neg);                                     \
+    } else if (_TYPE##_is_denormal(f1)) {                          \
+        return dcmask(4, neg);                                     \
+    } else if (_TYPE##_is_infinity(f1)) {                          \
+        return dcmask(6, neg);                                     \
+    } else if (_TYPE##_is_quiet_nan(f1, &env->fpu_status)) {       \
+        return dcmask(8, neg);                                     \
+    }                                                              \
+    /* signaling nan, as last remaining case */                    \
+    return dcmask(10, neg);                                        \
+}
+DEF_FLOAT_DCMASK(float32)
+DEF_FLOAT_DCMASK(float64)
+DEF_FLOAT_DCMASK(float128)
+
+/* test data class 32-bit */
+uint32_t HELPER(tceb)(CPUS390XState *env, uint64_t f1, uint64_t m2)
+{
+    return (m2 & float32_dcmask(env, f1)) != 0;
+}
+
+/* test data class 64-bit */
+uint32_t HELPER(tcdb)(CPUS390XState *env, uint64_t v1, uint64_t m2)
+{
+    return (m2 & float64_dcmask(env, v1)) != 0;
+}
+
+/* test data class 128-bit */
+uint32_t HELPER(tcxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t m2)
+{
+    return (m2 & float128_dcmask(env, make_float128(ah, al))) != 0;
+}
+
+/* square root 32-bit */
+uint64_t HELPER(sqeb)(CPUS390XState *env, uint64_t f2)
+{
+    float32 ret = float32_sqrt(f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* square root 64-bit */
+uint64_t HELPER(sqdb)(CPUS390XState *env, uint64_t f2)
+{
+    float64 ret = float64_sqrt(f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* square root 128-bit */
+uint64_t HELPER(sqxb)(CPUS390XState *env, uint64_t ah, uint64_t al)
+{
+    float128 ret = float128_sqrt(make_float128(ah, al), &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+static const int fpc_to_rnd[8] = {
+    float_round_nearest_even,
+    float_round_to_zero,
+    float_round_up,
+    float_round_down,
+    -1,
+    -1,
+    -1,
+    float_round_to_odd,
+};
+
+/* set fpc */
+void HELPER(sfpc)(CPUS390XState *env, uint64_t fpc)
+{
+    if (fpc_to_rnd[fpc & 0x7] == -1 || fpc & 0x03030088u ||
+        (!s390_has_feat(S390_FEAT_FLOATING_POINT_EXT) && fpc & 0x4)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
+    }
+
+    /* Install everything in the main FPC.  */
+    env->fpc = fpc;
+
+    /* Install the rounding mode in the shadow fpu_status.  */
+    set_float_rounding_mode(fpc_to_rnd[fpc & 0x7], &env->fpu_status);
+}
+
+/* set fpc and signal */
+void HELPER(sfas)(CPUS390XState *env, uint64_t fpc)
+{
+    uint32_t signalling = env->fpc;
+    uint32_t s390_exc;
+
+    if (fpc_to_rnd[fpc & 0x7] == -1 || fpc & 0x03030088u ||
+        (!s390_has_feat(S390_FEAT_FLOATING_POINT_EXT) && fpc & 0x4)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
+    }
+
+    /*
+     * FPC is set to the FPC operand with a bitwise OR of the signalling
+     * flags.
+     */
+    env->fpc = fpc | (signalling & 0x00ff0000);
+    set_float_rounding_mode(fpc_to_rnd[fpc & 0x7], &env->fpu_status);
+
+    /*
+     * If any signaling flag is enabled in the new FPC mask, a
+     * simulated-iee-exception exception occurs.
+     */
+    s390_exc = (signalling >> 16) & (fpc >> 24);
+    if (s390_exc) {
+        if (s390_exc & S390_IEEE_MASK_INVALID) {
+            s390_exc = S390_IEEE_MASK_INVALID;
+        } else if (s390_exc & S390_IEEE_MASK_DIVBYZERO) {
+            s390_exc = S390_IEEE_MASK_DIVBYZERO;
+        } else if (s390_exc & S390_IEEE_MASK_OVERFLOW) {
+            s390_exc &= (S390_IEEE_MASK_OVERFLOW | S390_IEEE_MASK_INEXACT);
+        } else if (s390_exc & S390_IEEE_MASK_UNDERFLOW) {
+            s390_exc &= (S390_IEEE_MASK_UNDERFLOW | S390_IEEE_MASK_INEXACT);
+        } else if (s390_exc & S390_IEEE_MASK_INEXACT) {
+            s390_exc = S390_IEEE_MASK_INEXACT;
+        } else if (s390_exc & S390_IEEE_MASK_QUANTUM) {
+            s390_exc = S390_IEEE_MASK_QUANTUM;
+        }
+        tcg_s390_data_exception(env, s390_exc | 3, GETPC());
+    }
+}
+
+/* set bfp rounding mode */
+void HELPER(srnm)(CPUS390XState *env, uint64_t rnd)
+{
+    if (rnd > 0x7 || fpc_to_rnd[rnd & 0x7] == -1) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
+    }
+
+    env->fpc = deposit32(env->fpc, 0, 3, rnd);
+    set_float_rounding_mode(fpc_to_rnd[rnd & 0x7], &env->fpu_status);
+}
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
new file mode 100644
index 0000000000..3e5594210c
--- /dev/null
+++ b/target/s390x/tcg/insn-data.def
@@ -0,0 +1,1398 @@
+/*
+ *  Arguments to the opcode prototypes
+ *
+ *  C(OPC,    NAME,    FMT,   FAC, I1, I2, P, W, OP, CC)
+ *  D(OPC,    NAME,    FMT,   FAC, I1, I2, P, W, OP, CC, DATA)
+ *  E(OPC,    NAME,    FMT,   FAC, I1, I2, P, W, OP, CC, DATA, FLAGS)
+ *  F(OPC,    NAME,    FMT,   FAC, I1, I2, P, W, OP, CC, FLAGS)
+ *
+ *  OPC  = (op << 8) | op2 where op is the major, op2 the minor opcode
+ *  NAME = name of the opcode, used internally
+ *  FMT  = format of the opcode (defined in insn-format.def)
+ *  FAC  = facility the opcode is available in (defined in DisasFacility)
+ *  I1   = func in1_xx fills o->in1
+ *  I2   = func in2_xx fills o->in2
+ *  P    = func prep_xx initializes o->*out*
+ *  W    = func wout_xx writes o->*out* somewhere
+ *  OP   = func op_xx does the bulk of the operation
+ *  CC   = func cout_xx defines how cc should get set
+ *  DATA = immediate argument to op_xx function
+ *  FLAGS = categorize the type of instruction (e.g. for advanced checks)
+ *
+ *  The helpers get called in order: I1, I2, P, OP, W, CC
+ */
+
+/* ADD */
+    C(0x1a00, AR,      RR_a,  Z,   r1, r2, new, r1_32, add, adds32)
+    C(0xb9f8, ARK,     RRF_a, DO,  r2, r3, new, r1_32, add, adds32)
+    C(0x5a00, A,       RX_a,  Z,   r1, m2_32s, new, r1_32, add, adds32)
+    C(0xe35a, AY,      RXY_a, LD,  r1, m2_32s, new, r1_32, add, adds32)
+    C(0xb908, AGR,     RRE,   Z,   r1, r2, r1, 0, add, adds64)
+    C(0xb918, AGFR,    RRE,   Z,   r1, r2_32s, r1, 0, add, adds64)
+    C(0xb9e8, AGRK,    RRF_a, DO,  r2, r3, r1, 0, add, adds64)
+    C(0xe308, AG,      RXY_a, Z,   r1, m2_64, r1, 0, add, adds64)
+    C(0xe318, AGF,     RXY_a, Z,   r1, m2_32s, r1, 0, add, adds64)
+    F(0xb30a, AEBR,    RRE,   Z,   e1, e2, new, e1, aeb, f32, IF_BFP)
+    F(0xb31a, ADBR,    RRE,   Z,   f1, f2, new, f1, adb, f64, IF_BFP)
+    F(0xb34a, AXBR,    RRE,   Z,   x2h, x2l, x1, x1, axb, f128, IF_BFP)
+    F(0xed0a, AEB,     RXE,   Z,   e1, m2_32u, new, e1, aeb, f32, IF_BFP)
+    F(0xed1a, ADB,     RXE,   Z,   f1, m2_64, new, f1, adb, f64, IF_BFP)
+/* ADD HIGH */
+    C(0xb9c8, AHHHR,   RRF_a, HW,  r2_sr32, r3_sr32, new, r1_32h, add, adds32)
+    C(0xb9d8, AHHLR,   RRF_a, HW,  r2_sr32, r3, new, r1_32h, add, adds32)
+/* ADD IMMEDIATE */
+    C(0xc209, AFI,     RIL_a, EI,  r1, i2, new, r1_32, add, adds32)
+    D(0xeb6a, ASI,     SIY,   GIE, la1, i2, new, 0, asi, adds32, MO_TESL)
+    C(0xecd8, AHIK,    RIE_d, DO,  r3, i2, new, r1_32, add, adds32)
+    C(0xc208, AGFI,    RIL_a, EI,  r1, i2, r1, 0, add, adds64)
+    D(0xeb7a, AGSI,    SIY,   GIE, la1, i2, new, 0, asi, adds64, MO_TEQ)
+    C(0xecd9, AGHIK,   RIE_d, DO,  r3, i2, r1, 0, add, adds64)
+/* ADD IMMEDIATE HIGH */
+    C(0xcc08, AIH,     RIL_a, HW,  r1_sr32, i2, new, r1_32h, add, adds32)
+/* ADD HALFWORD */
+    C(0x4a00, AH,      RX_a,  Z,   r1, m2_16s, new, r1_32, add, adds32)
+    C(0xe37a, AHY,     RXY_a, LD,  r1, m2_16s, new, r1_32, add, adds32)
+    C(0xe338, AGH,     RXY_a, MIE2,r1, m2_16s, r1, 0, add, adds64)
+/* ADD HALFWORD IMMEDIATE */
+    C(0xa70a, AHI,     RI_a,  Z,   r1, i2, new, r1_32, add, adds32)
+    C(0xa70b, AGHI,    RI_a,  Z,   r1, i2, r1, 0, add, adds64)
+
+/* ADD LOGICAL */
+    C(0x1e00, ALR,     RR_a,  Z,   r1_32u, r2_32u, new, r1_32, add, addu32)
+    C(0xb9fa, ALRK,    RRF_a, DO,  r2_32u, r3_32u, new, r1_32, add, addu32)
+    C(0x5e00, AL,      RX_a,  Z,   r1_32u, m2_32u, new, r1_32, add, addu32)
+    C(0xe35e, ALY,     RXY_a, LD,  r1_32u, m2_32u, new, r1_32, add, addu32)
+    C(0xb90a, ALGR,    RRE,   Z,   r1, r2, r1, 0, addu64, addu64)
+    C(0xb91a, ALGFR,   RRE,   Z,   r1, r2_32u, r1, 0, addu64, addu64)
+    C(0xb9ea, ALGRK,   RRF_a, DO,  r2, r3, r1, 0, addu64, addu64)
+    C(0xe30a, ALG,     RXY_a, Z,   r1, m2_64, r1, 0, addu64, addu64)
+    C(0xe31a, ALGF,    RXY_a, Z,   r1, m2_32u, r1, 0, addu64, addu64)
+/* ADD LOGICAL HIGH */
+    C(0xb9ca, ALHHHR,  RRF_a, HW,  r2_sr32, r3_sr32, new, r1_32h, add, addu32)
+    C(0xb9da, ALHHLR,  RRF_a, HW,  r2_sr32, r3_32u, new, r1_32h, add, addu32)
+/* ADD LOGICAL IMMEDIATE */
+    C(0xc20b, ALFI,    RIL_a, EI,  r1_32u, i2_32u, new, r1_32, add, addu32)
+    C(0xc20a, ALGFI,   RIL_a, EI,  r1, i2_32u, r1, 0, addu64, addu64)
+/* ADD LOGICAL WITH SIGNED IMMEDIATE */
+    D(0xeb6e, ALSI,    SIY,   GIE, la1, i2_32u, new, 0, asi, addu32, MO_TEUL)
+    C(0xecda, ALHSIK,  RIE_d, DO,  r3_32u, i2_32u, new, r1_32, add, addu32)
+    D(0xeb7e, ALGSI,   SIY,   GIE, la1, i2, new, 0, asiu64, addu64, MO_TEQ)
+    C(0xecdb, ALGHSIK, RIE_d, DO,  r3, i2, r1, 0, addu64, addu64)
+/* ADD LOGICAL WITH SIGNED IMMEDIATE HIGH */
+    C(0xcc0a, ALSIH,   RIL_a, HW,  r1_sr32, i2_32u, new, r1_32h, add, addu32)
+    C(0xcc0b, ALSIHN,  RIL_a, HW,  r1_sr32, i2_32u, new, r1_32h, add, 0)
+/* ADD LOGICAL WITH CARRY */
+    C(0xb998, ALCR,    RRE,   Z,   r1_32u, r2_32u, new, r1_32, addc32, addu32)
+    C(0xb988, ALCGR,   RRE,   Z,   r1, r2, r1, 0, addc64, addu64)
+    C(0xe398, ALC,     RXY_a, Z,   r1_32u, m2_32u, new, r1_32, addc32, addu32)
+    C(0xe388, ALCG,    RXY_a, Z,   r1, m2_64, r1, 0, addc64, addu64)
+
+/* AND */
+    C(0x1400, NR,      RR_a,  Z,   r1, r2, new, r1_32, and, nz32)
+    C(0xb9f4, NRK,     RRF_a, DO,  r2, r3, new, r1_32, and, nz32)
+    C(0x5400, N,       RX_a,  Z,   r1, m2_32s, new, r1_32, and, nz32)
+    C(0xe354, NY,      RXY_a, LD,  r1, m2_32s, new, r1_32, and, nz32)
+    C(0xb980, NGR,     RRE,   Z,   r1, r2, r1, 0, and, nz64)
+    C(0xb9e4, NGRK,    RRF_a, DO,  r2, r3, r1, 0, and, nz64)
+    C(0xe380, NG,      RXY_a, Z,   r1, m2_64, r1, 0, and, nz64)
+    C(0xd400, NC,      SS_a,  Z,   la1, a2, 0, 0, nc, 0)
+/* AND IMMEDIATE */
+    D(0xc00a, NIHF,    RIL_a, EI,  r1_o, i2_32u, r1, 0, andi, 0, 0x2020)
+    D(0xc00b, NILF,    RIL_a, EI,  r1_o, i2_32u, r1, 0, andi, 0, 0x2000)
+    D(0xa504, NIHH,    RI_a,  Z,   r1_o, i2_16u, r1, 0, andi, 0, 0x1030)
+    D(0xa505, NIHL,    RI_a,  Z,   r1_o, i2_16u, r1, 0, andi, 0, 0x1020)
+    D(0xa506, NILH,    RI_a,  Z,   r1_o, i2_16u, r1, 0, andi, 0, 0x1010)
+    D(0xa507, NILL,    RI_a,  Z,   r1_o, i2_16u, r1, 0, andi, 0, 0x1000)
+    D(0x9400, NI,      SI,    Z,   la1, i2_8u, new, 0, ni, nz64, MO_UB)
+    D(0xeb54, NIY,     SIY,   LD,  la1, i2_8u, new, 0, ni, nz64, MO_UB)
+
+/* BRANCH AND LINK */
+    C(0x0500, BALR,    RR_a,  Z,   0, r2_nz, r1, 0, bal, 0)
+    C(0x4500, BAL,     RX_a,  Z,   0, a2, r1, 0, bal, 0)
+/* BRANCH AND SAVE */
+    C(0x0d00, BASR,    RR_a,  Z,   0, r2_nz, r1, 0, bas, 0)
+    C(0x4d00, BAS,     RX_a,  Z,   0, a2, r1, 0, bas, 0)
+/* BRANCH RELATIVE AND SAVE */
+    C(0xa705, BRAS,    RI_b,  Z,   0, 0, r1, 0, basi, 0)
+    C(0xc005, BRASL,   RIL_b, Z,   0, 0, r1, 0, basi, 0)
+/* BRANCH INDIRECT ON CONDITION */
+    C(0xe347, BIC,     RXY_b, MIE2,0, m2_64w, 0, 0, bc, 0)
+/* BRANCH ON CONDITION */
+    C(0x0700, BCR,     RR_b,  Z,   0, r2_nz, 0, 0, bc, 0)
+    C(0x4700, BC,      RX_b,  Z,   0, a2, 0, 0, bc, 0)
+/* BRANCH RELATIVE ON CONDITION */
+    C(0xa704, BRC,     RI_c,  Z,   0, 0, 0, 0, bc, 0)
+    C(0xc004, BRCL,    RIL_c, Z,   0, 0, 0, 0, bc, 0)
+/* BRANCH ON COUNT */
+    C(0x0600, BCTR,    RR_a,  Z,   0, r2_nz, 0, 0, bct32, 0)
+    C(0xb946, BCTGR,   RRE,   Z,   0, r2_nz, 0, 0, bct64, 0)
+    C(0x4600, BCT,     RX_a,  Z,   0, a2, 0, 0, bct32, 0)
+    C(0xe346, BCTG,    RXY_a, Z,   0, a2, 0, 0, bct64, 0)
+/* BRANCH RELATIVE ON COUNT */
+    C(0xa706, BRCT,    RI_b,  Z,   0, 0, 0, 0, bct32, 0)
+    C(0xa707, BRCTG,   RI_b,  Z,   0, 0, 0, 0, bct64, 0)
+/* BRANCH RELATIVE ON COUNT HIGH */
+    C(0xcc06, BRCTH,   RIL_b, HW,  0, 0, 0, 0, bcth, 0)
+/* BRANCH ON INDEX */
+    D(0x8600, BXH,     RS_a,  Z,   0, a2, 0, 0, bx32, 0, 0)
+    D(0x8700, BXLE,    RS_a,  Z,   0, a2, 0, 0, bx32, 0, 1)
+    D(0xeb44, BXHG,    RSY_a, Z,   0, a2, 0, 0, bx64, 0, 0)
+    D(0xeb45, BXLEG,   RSY_a, Z,   0, a2, 0, 0, bx64, 0, 1)
+/* BRANCH RELATIVE ON INDEX */
+    D(0x8400, BRXH,    RSI,   Z,   0, 0, 0, 0, bx32, 0, 0)
+    D(0x8500, BRXLE,   RSI,   Z,   0, 0, 0, 0, bx32, 0, 1)
+    D(0xec44, BRXHG,   RIE_e, Z,   0, 0, 0, 0, bx64, 0, 0)
+    D(0xec45, BRXHLE,  RIE_e, Z,   0, 0, 0, 0, bx64, 0, 1)
+/* BRANCH PREDICTION PRELOAD */
+    /* ??? Format is SMI, but implemented as NOP, so we need no fields.  */
+    C(0xc700, BPP,     E,     EH,  0, 0, 0, 0, 0, 0)
+/* BRANCH PREDICTION RELATIVE PRELOAD */
+    /* ??? Format is MII, but implemented as NOP, so we need no fields.  */
+    C(0xc500, BPRP,    E,     EH,  0, 0, 0, 0, 0, 0)
+/* NEXT INSTRUCTION ACCESS INTENT */
+    /* ??? Format is IE, but implemented as NOP, so we need no fields.  */
+    C(0xb2fa, NIAI,    E,     EH,  0, 0, 0, 0, 0, 0)
+
+/* CHECKSUM */
+    C(0xb241, CKSM,    RRE,   Z,   r1_o, ra2, new, r1_32, cksm, 0)
+
+/* COPY SIGN */
+    F(0xb372, CPSDR,   RRF_b, FPSSH, f3, f2, new, f1, cps, 0, IF_AFP1 | IF_AFP2 | IF_AFP3)
+
+/* COMPARE */
+    C(0x1900, CR,      RR_a,  Z,   r1_o, r2_o, 0, 0, 0, cmps32)
+    C(0x5900, C,       RX_a,  Z,   r1_o, m2_32s, 0, 0, 0, cmps32)
+    C(0xe359, CY,      RXY_a, LD,  r1_o, m2_32s, 0, 0, 0, cmps32)
+    C(0xb920, CGR,     RRE,   Z,   r1_o, r2_o, 0, 0, 0, cmps64)
+    C(0xb930, CGFR,    RRE,   Z,   r1_o, r2_32s, 0, 0, 0, cmps64)
+    C(0xe320, CG,      RXY_a, Z,   r1_o, m2_64, 0, 0, 0, cmps64)
+    C(0xe330, CGF,     RXY_a, Z,   r1_o, m2_32s, 0, 0, 0, cmps64)
+    F(0xb309, CEBR,    RRE,   Z,   e1, e2, 0, 0, ceb, 0, IF_BFP)
+    F(0xb319, CDBR,    RRE,   Z,   f1, f2, 0, 0, cdb, 0, IF_BFP)
+    F(0xb349, CXBR,    RRE,   Z,   x2h, x2l, x1, 0, cxb, 0, IF_BFP)
+    F(0xed09, CEB,     RXE,   Z,   e1, m2_32u, 0, 0, ceb, 0, IF_BFP)
+    F(0xed19, CDB,     RXE,   Z,   f1, m2_64, 0, 0, cdb, 0, IF_BFP)
+/* COMPARE AND SIGNAL */
+    F(0xb308, KEBR,    RRE,   Z,   e1, e2, 0, 0, keb, 0, IF_BFP)
+    F(0xb318, KDBR,    RRE,   Z,   f1, f2, 0, 0, kdb, 0, IF_BFP)
+    F(0xb348, KXBR,    RRE,   Z,   x2h, x2l, x1, 0, kxb, 0, IF_BFP)
+    F(0xed08, KEB,     RXE,   Z,   e1, m2_32u, 0, 0, keb, 0, IF_BFP)
+    F(0xed18, KDB,     RXE,   Z,   f1, m2_64, 0, 0, kdb, 0, IF_BFP)
+/* COMPARE IMMEDIATE */
+    C(0xc20d, CFI,     RIL_a, EI,  r1, i2, 0, 0, 0, cmps32)
+    C(0xc20c, CGFI,    RIL_a, EI,  r1, i2, 0, 0, 0, cmps64)
+/* COMPARE RELATIVE LONG */
+    C(0xc60d, CRL,     RIL_b, GIE, r1, mri2_32s, 0, 0, 0, cmps32)
+    C(0xc608, CGRL,    RIL_b, GIE, r1, mri2_64, 0, 0, 0, cmps64)
+    C(0xc60c, CGFRL,   RIL_b, GIE, r1, mri2_32s, 0, 0, 0, cmps64)
+/* COMPARE HALFWORD */
+    C(0x4900, CH,      RX_a,  Z,   r1_o, m2_16s, 0, 0, 0, cmps32)
+    C(0xe379, CHY,     RXY_a, LD,  r1_o, m2_16s, 0, 0, 0, cmps32)
+    C(0xe334, CGH,     RXY_a, GIE, r1_o, m2_16s, 0, 0, 0, cmps64)
+/* COMPARE HALFWORD IMMEDIATE */
+    C(0xa70e, CHI,     RI_a,  Z,   r1_o, i2, 0, 0, 0, cmps32)
+    C(0xa70f, CGHI,    RI_a,  Z,   r1_o, i2, 0, 0, 0, cmps64)
+    C(0xe554, CHHSI,   SIL,   GIE, m1_16s, i2, 0, 0, 0, cmps64)
+    C(0xe55c, CHSI,    SIL,   GIE, m1_32s, i2, 0, 0, 0, cmps64)
+    C(0xe558, CGHSI,   SIL,   GIE, m1_64, i2, 0, 0, 0, cmps64)
+/* COMPARE HALFWORD RELATIVE LONG */
+    C(0xc605, CHRL,    RIL_b, GIE, r1_o, mri2_32s, 0, 0, 0, cmps32)
+    C(0xc604, CGHRL,   RIL_b, GIE, r1_o, mri2_64, 0, 0, 0, cmps64)
+/* COMPARE HIGH */
+    C(0xb9cd, CHHR,    RRE,   HW,  r1_sr32, r2_sr32, 0, 0, 0, cmps32)
+    C(0xb9dd, CHLR,    RRE,   HW,  r1_sr32, r2_o, 0, 0, 0, cmps32)
+    C(0xe3cd, CHF,     RXY_a, HW,  r1_sr32, m2_32s, 0, 0, 0, cmps32)
+/* COMPARE IMMEDIATE HIGH */
+    C(0xcc0d, CIH,     RIL_a, HW,  r1_sr32, i2, 0, 0, 0, cmps32)
+
+/* COMPARE LOGICAL */
+    C(0x1500, CLR,     RR_a,  Z,   r1, r2, 0, 0, 0, cmpu32)
+    C(0x5500, CL,      RX_a,  Z,   r1, m2_32s, 0, 0, 0, cmpu32)
+    C(0xe355, CLY,     RXY_a, LD,  r1, m2_32s, 0, 0, 0, cmpu32)
+    C(0xb921, CLGR,    RRE,   Z,   r1, r2, 0, 0, 0, cmpu64)
+    C(0xb931, CLGFR,   RRE,   Z,   r1, r2_32u, 0, 0, 0, cmpu64)
+    C(0xe321, CLG,     RXY_a, Z,   r1, m2_64, 0, 0, 0, cmpu64)
+    C(0xe331, CLGF,    RXY_a, Z,   r1, m2_32u, 0, 0, 0, cmpu64)
+    C(0xd500, CLC,     SS_a,  Z,   la1, a2, 0, 0, clc, 0)
+/* COMPARE LOGICAL HIGH */
+    C(0xb9cf, CLHHR,   RRE,   HW,  r1_sr32, r2_sr32, 0, 0, 0, cmpu32)
+    C(0xb9df, CLHLR,   RRE,   HW,  r1_sr32, r2_o, 0, 0, 0, cmpu32)
+    C(0xe3cf, CLHF,    RXY_a, HW,  r1_sr32, m2_32s, 0, 0, 0, cmpu32)
+/* COMPARE LOGICAL IMMEDIATE */
+    C(0xc20f, CLFI,    RIL_a, EI,  r1, i2, 0, 0, 0, cmpu32)
+    C(0xc20e, CLGFI,   RIL_a, EI,  r1, i2_32u, 0, 0, 0, cmpu64)
+    C(0x9500, CLI,     SI,    Z,   m1_8u, i2_8u, 0, 0, 0, cmpu64)
+    C(0xeb55, CLIY,    SIY,   LD,  m1_8u, i2_8u, 0, 0, 0, cmpu64)
+    C(0xe555, CLHHSI,  SIL,   GIE, m1_16u, i2_16u, 0, 0, 0, cmpu64)
+    C(0xe55d, CLFHSI,  SIL,   GIE, m1_32u, i2_16u, 0, 0, 0, cmpu64)
+    C(0xe559, CLGHSI,  SIL,   GIE, m1_64, i2_16u, 0, 0, 0, cmpu64)
+/* COMPARE LOGICAL IMMEDIATE HIGH */
+    C(0xcc0f, CLIH,    RIL_a, HW,  r1_sr32, i2, 0, 0, 0, cmpu32)
+/* COMPARE LOGICAL RELATIVE LONG */
+    C(0xc60f, CLRL,    RIL_b, GIE, r1_o, mri2_32u, 0, 0, 0, cmpu32)
+    C(0xc60a, CLGRL,   RIL_b, GIE, r1_o, mri2_64, 0, 0, 0, cmpu64)
+    C(0xc60e, CLGFRL,  RIL_b, GIE, r1_o, mri2_32u, 0, 0, 0, cmpu64)
+    C(0xc607, CLHRL,   RIL_b, GIE, r1_o, mri2_16u, 0, 0, 0, cmpu32)
+    C(0xc606, CLGHRL,  RIL_b, GIE, r1_o, mri2_16u, 0, 0, 0, cmpu64)
+/* COMPARE LOGICAL LONG */
+    C(0x0f00, CLCL,    RR_a,  Z,   0, 0, 0, 0, clcl, 0)
+/* COMPARE LOGICAL LONG EXTENDED */
+    C(0xa900, CLCLE,   RS_a,  Z,   0, a2, 0, 0, clcle, 0)
+/* COMPARE LOGICAL LONG UNICODE */
+    C(0xeb8f, CLCLU,   RSY_a, E2,  0, a2, 0, 0, clclu, 0)
+/* COMPARE LOGICAL CHARACTERS UNDER MASK */
+    C(0xbd00, CLM,     RS_b,  Z,   r1_o, a2, 0, 0, clm, 0)
+    C(0xeb21, CLMY,    RSY_b, LD,  r1_o, a2, 0, 0, clm, 0)
+    C(0xeb20, CLMH,    RSY_b, Z,   r1_sr32, a2, 0, 0, clm, 0)
+/* COMPARE LOGICAL STRING */
+    C(0xb25d, CLST,    RRE,   Z,   r1_o, r2_o, 0, 0, clst, 0)
+
+/* COMPARE AND BRANCH */
+    D(0xecf6, CRB,     RRS,   GIE, r1_32s, r2_32s, 0, 0, cj, 0, 0)
+    D(0xece4, CGRB,    RRS,   GIE, r1_o, r2_o, 0, 0, cj, 0, 0)
+    D(0xec76, CRJ,     RIE_b, GIE, r1_32s, r2_32s, 0, 0, cj, 0, 0)
+    D(0xec64, CGRJ,    RIE_b, GIE, r1_o, r2_o, 0, 0, cj, 0, 0)
+    D(0xecfe, CIB,     RIS,   GIE, r1_32s, i2, 0, 0, cj, 0, 0)
+    D(0xecfc, CGIB,    RIS,   GIE, r1_o, i2, 0, 0, cj, 0, 0)
+    D(0xec7e, CIJ,     RIE_c, GIE, r1_32s, i2, 0, 0, cj, 0, 0)
+    D(0xec7c, CGIJ,    RIE_c, GIE, r1_o, i2, 0, 0, cj, 0, 0)
+/* COMPARE LOGICAL AND BRANCH */
+    D(0xecf7, CLRB,    RRS,   GIE, r1_32u, r2_32u, 0, 0, cj, 0, 1)
+    D(0xece5, CLGRB,   RRS,   GIE, r1_o, r2_o, 0, 0, cj, 0, 1)
+    D(0xec77, CLRJ,    RIE_b, GIE, r1_32u, r2_32u, 0, 0, cj, 0, 1)
+    D(0xec65, CLGRJ,   RIE_b, GIE, r1_o, r2_o, 0, 0, cj, 0, 1)
+    D(0xecff, CLIB,    RIS,   GIE, r1_32u, i2_8u, 0, 0, cj, 0, 1)
+    D(0xecfd, CLGIB,   RIS,   GIE, r1_o, i2_8u, 0, 0, cj, 0, 1)
+    D(0xec7f, CLIJ,    RIE_c, GIE, r1_32u, i2_8u, 0, 0, cj, 0, 1)
+    D(0xec7d, CLGIJ,   RIE_c, GIE, r1_o, i2_8u, 0, 0, cj, 0, 1)
+
+/* COMPARE AND SWAP */
+    D(0xba00, CS,      RS_a,  Z,   r3_32u, r1_32u, new, r1_32, cs, 0, MO_TEUL)
+    D(0xeb14, CSY,     RSY_a, LD,  r3_32u, r1_32u, new, r1_32, cs, 0, MO_TEUL)
+    D(0xeb30, CSG,     RSY_a, Z,   r3_o, r1_o, new, r1, cs, 0, MO_TEQ)
+/* COMPARE DOUBLE AND SWAP */
+    D(0xbb00, CDS,     RS_a,  Z,   r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEQ)
+    D(0xeb31, CDSY,    RSY_a, LD,  r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEQ)
+    C(0xeb3e, CDSG,    RSY_a, Z,   0, 0, 0, 0, cdsg, 0)
+/* COMPARE AND SWAP AND STORE */
+    C(0xc802, CSST,    SSF,   CASS, la1, a2, 0, 0, csst, 0)
+
+/* COMPARE AND TRAP */
+    D(0xb972, CRT,     RRF_c, GIE, r1_32s, r2_32s, 0, 0, ct, 0, 0)
+    D(0xb960, CGRT,    RRF_c, GIE, r1_o, r2_o, 0, 0, ct, 0, 0)
+    D(0xec72, CIT,     RIE_a, GIE, r1_32s, i2, 0, 0, ct, 0, 0)
+    D(0xec70, CGIT,    RIE_a, GIE, r1_o, i2, 0, 0, ct, 0, 0)
+/* COMPARE LOGICAL AND TRAP */
+    D(0xb973, CLRT,    RRF_c, GIE, r1_32u, r2_32u, 0, 0, ct, 0, 1)
+    D(0xb961, CLGRT,   RRF_c, GIE, r1_o, r2_o, 0, 0, ct, 0, 1)
+    D(0xeb23, CLT,     RSY_b, MIE, r1_32u, m2_32u, 0, 0, ct, 0, 1)
+    D(0xeb2b, CLGT,    RSY_b, MIE, r1_o, m2_64, 0, 0, ct, 0, 1)
+    D(0xec73, CLFIT,   RIE_a, GIE, r1_32u, i2_32u, 0, 0, ct, 0, 1)
+    D(0xec71, CLGIT,   RIE_a, GIE, r1_o, i2_32u, 0, 0, ct, 0, 1)
+
+/* CONVERT TO DECIMAL */
+    C(0x4e00, CVD,     RX_a,  Z,   r1_o, a2, 0, 0, cvd, 0)
+    C(0xe326, CVDY,    RXY_a, LD,  r1_o, a2, 0, 0, cvd, 0)
+/* CONVERT TO FIXED */
+    F(0xb398, CFEBR,   RRF_e, Z,   0, e2, new, r1_32, cfeb, 0, IF_BFP)
+    F(0xb399, CFDBR,   RRF_e, Z,   0, f2, new, r1_32, cfdb, 0, IF_BFP)
+    F(0xb39a, CFXBR,   RRF_e, Z,   x2h, x2l, new, r1_32, cfxb, 0, IF_BFP)
+    F(0xb3a8, CGEBR,   RRF_e, Z,   0, e2, r1, 0, cgeb, 0, IF_BFP)
+    F(0xb3a9, CGDBR,   RRF_e, Z,   0, f2, r1, 0, cgdb, 0, IF_BFP)
+    F(0xb3aa, CGXBR,   RRF_e, Z,   x2h, x2l, r1, 0, cgxb, 0, IF_BFP)
+/* CONVERT FROM FIXED */
+    F(0xb394, CEFBR,   RRF_e, Z,   0, r2_32s, new, e1, cegb, 0, IF_BFP)
+    F(0xb395, CDFBR,   RRF_e, Z,   0, r2_32s, new, f1, cdgb, 0, IF_BFP)
+    F(0xb396, CXFBR,   RRF_e, Z,   0, r2_32s, new_P, x1, cxgb, 0, IF_BFP)
+    F(0xb3a4, CEGBR,   RRF_e, Z,   0, r2_o, new, e1, cegb, 0, IF_BFP)
+    F(0xb3a5, CDGBR,   RRF_e, Z,   0, r2_o, new, f1, cdgb, 0, IF_BFP)
+    F(0xb3a6, CXGBR,   RRF_e, Z,   0, r2_o, new_P, x1, cxgb, 0, IF_BFP)
+/* CONVERT TO LOGICAL */
+    F(0xb39c, CLFEBR,  RRF_e, FPE, 0, e2, new, r1_32, clfeb, 0, IF_BFP)
+    F(0xb39d, CLFDBR,  RRF_e, FPE, 0, f2, new, r1_32, clfdb, 0, IF_BFP)
+    F(0xb39e, CLFXBR,  RRF_e, FPE, x2h, x2l, new, r1_32, clfxb, 0, IF_BFP)
+    F(0xb3ac, CLGEBR,  RRF_e, FPE, 0, e2, r1, 0, clgeb, 0, IF_BFP)
+    F(0xb3ad, CLGDBR,  RRF_e, FPE, 0, f2, r1, 0, clgdb, 0, IF_BFP)
+    F(0xb3ae, CLGXBR,  RRF_e, FPE, x2h, x2l, r1, 0, clgxb, 0, IF_BFP)
+/* CONVERT FROM LOGICAL */
+    F(0xb390, CELFBR,  RRF_e, FPE, 0, r2_32u, new, e1, celgb, 0, IF_BFP)
+    F(0xb391, CDLFBR,  RRF_e, FPE, 0, r2_32u, new, f1, cdlgb, 0, IF_BFP)
+    F(0xb392, CXLFBR,  RRF_e, FPE, 0, r2_32u, new_P, x1, cxlgb, 0, IF_BFP)
+    F(0xb3a0, CELGBR,  RRF_e, FPE, 0, r2_o, new, e1, celgb, 0, IF_BFP)
+    F(0xb3a1, CDLGBR,  RRF_e, FPE, 0, r2_o, new, f1, cdlgb, 0, IF_BFP)
+    F(0xb3a2, CXLGBR,  RRF_e, FPE, 0, r2_o, new_P, x1, cxlgb, 0, IF_BFP)
+
+/* CONVERT UTF-8 TO UTF-16 */
+    D(0xb2a7, CU12,    RRF_c, Z,   0, 0, 0, 0, cuXX, 0, 12)
+/* CONVERT UTF-8 TO UTF-32 */
+    D(0xb9b0, CU14,    RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 14)
+/* CONVERT UTF-16 to UTF-8 */
+    D(0xb2a6, CU21,    RRF_c, Z,   0, 0, 0, 0, cuXX, 0, 21)
+/* CONVERT UTF-16 to UTF-32 */
+    D(0xb9b1, CU24,    RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 24)
+/* CONVERT UTF-32 to UTF-8 */
+    D(0xb9b2, CU41,    RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 41)
+/* CONVERT UTF-32 to UTF-16 */
+    D(0xb9b3, CU42,    RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 42)
+
+/* DIVIDE */
+    C(0x1d00, DR,      RR_a,  Z,   r1_D32, r2_32s, new_P, r1_P32, divs32, 0)
+    C(0x5d00, D,       RX_a,  Z,   r1_D32, m2_32s, new_P, r1_P32, divs32, 0)
+    F(0xb30d, DEBR,    RRE,   Z,   e1, e2, new, e1, deb, 0, IF_BFP)
+    F(0xb31d, DDBR,    RRE,   Z,   f1, f2, new, f1, ddb, 0, IF_BFP)
+    F(0xb34d, DXBR,    RRE,   Z,   x2h, x2l, x1, x1, dxb, 0, IF_BFP)
+    F(0xed0d, DEB,     RXE,   Z,   e1, m2_32u, new, e1, deb, 0, IF_BFP)
+    F(0xed1d, DDB,     RXE,   Z,   f1, m2_64, new, f1, ddb, 0, IF_BFP)
+/* DIVIDE LOGICAL */
+    C(0xb997, DLR,     RRE,   Z,   r1_D32, r2_32u, new_P, r1_P32, divu32, 0)
+    C(0xe397, DL,      RXY_a, Z,   r1_D32, m2_32u, new_P, r1_P32, divu32, 0)
+    C(0xb987, DLGR,    RRE,   Z,   0, r2_o, r1_P, 0, divu64, 0)
+    C(0xe387, DLG,     RXY_a, Z,   0, m2_64, r1_P, 0, divu64, 0)
+/* DIVIDE SINGLE */
+    C(0xb90d, DSGR,    RRE,   Z,   r1p1, r2, r1_P, 0, divs64, 0)
+    C(0xb91d, DSGFR,   RRE,   Z,   r1p1, r2_32s, r1_P, 0, divs64, 0)
+    C(0xe30d, DSG,     RXY_a, Z,   r1p1, m2_64, r1_P, 0, divs64, 0)
+    C(0xe31d, DSGF,    RXY_a, Z,   r1p1, m2_32s, r1_P, 0, divs64, 0)
+
+/* EXCLUSIVE OR */
+    C(0x1700, XR,      RR_a,  Z,   r1, r2, new, r1_32, xor, nz32)
+    C(0xb9f7, XRK,     RRF_a, DO,  r2, r3, new, r1_32, xor, nz32)
+    C(0x5700, X,       RX_a,  Z,   r1, m2_32s, new, r1_32, xor, nz32)
+    C(0xe357, XY,      RXY_a, LD,  r1, m2_32s, new, r1_32, xor, nz32)
+    C(0xb982, XGR,     RRE,   Z,   r1, r2, r1, 0, xor, nz64)
+    C(0xb9e7, XGRK,    RRF_a, DO,  r2, r3, r1, 0, xor, nz64)
+    C(0xe382, XG,      RXY_a, Z,   r1, m2_64, r1, 0, xor, nz64)
+    C(0xd700, XC,      SS_a,  Z,   0, 0, 0, 0, xc, 0)
+/* EXCLUSIVE OR IMMEDIATE */
+    D(0xc006, XIHF,    RIL_a, EI,  r1_o, i2_32u, r1, 0, xori, 0, 0x2020)
+    D(0xc007, XILF,    RIL_a, EI,  r1_o, i2_32u, r1, 0, xori, 0, 0x2000)
+    D(0x9700, XI,      SI,    Z,   la1, i2_8u, new, 0, xi, nz64, MO_UB)
+    D(0xeb57, XIY,     SIY,   LD,  la1, i2_8u, new, 0, xi, nz64, MO_UB)
+
+/* EXECUTE */
+    C(0x4400, EX,      RX_a,  Z,   0, a2, 0, 0, ex, 0)
+/* EXECUTE RELATIVE LONG */
+    C(0xc600, EXRL,    RIL_b, EE,  0, ri2, 0, 0, ex, 0)
+
+/* EXTRACT ACCESS */
+    C(0xb24f, EAR,     RRE,   Z,   0, 0, new, r1_32, ear, 0)
+/* EXTRACT CPU ATTRIBUTE */
+    C(0xeb4c, ECAG,    RSY_a, GIE, 0, a2, r1, 0, ecag, 0)
+/* EXTRACT CPU TIME */
+    F(0xc801, ECTG,    SSF,   ECT, 0, 0, 0, 0, ectg, 0, IF_IO)
+/* EXTRACT FPC */
+    F(0xb38c, EFPC,    RRE,   Z,   0, 0, new, r1_32, efpc, 0, IF_BFP)
+/* EXTRACT PSW */
+    C(0xb98d, EPSW,    RRE,   Z,   0, 0, 0, 0, epsw, 0)
+
+/* FIND LEFTMOST ONE */
+    C(0xb983, FLOGR,   RRE,   EI,  0, r2_o, r1_P, 0, flogr, 0)
+
+/* INSERT CHARACTER */
+    C(0x4300, IC,      RX_a,  Z,   0, m2_8u, 0, r1_8, mov2, 0)
+    C(0xe373, ICY,     RXY_a, LD,  0, m2_8u, 0, r1_8, mov2, 0)
+/* INSERT CHARACTERS UNDER MASK */
+    D(0xbf00, ICM,     RS_b,  Z,   0, a2, r1, 0, icm, 0, 0)
+    D(0xeb81, ICMY,    RSY_b, LD,  0, a2, r1, 0, icm, 0, 0)
+    D(0xeb80, ICMH,    RSY_b, Z,   0, a2, r1, 0, icm, 0, 32)
+/* INSERT IMMEDIATE */
+    D(0xc008, IIHF,    RIL_a, EI,  r1_o, i2_32u, r1, 0, insi, 0, 0x2020)
+    D(0xc009, IILF,    RIL_a, EI,  r1_o, i2_32u, r1, 0, insi, 0, 0x2000)
+    D(0xa500, IIHH,    RI_a,  Z,   r1_o, i2_16u, r1, 0, insi, 0, 0x1030)
+    D(0xa501, IIHL,    RI_a,  Z,   r1_o, i2_16u, r1, 0, insi, 0, 0x1020)
+    D(0xa502, IILH,    RI_a,  Z,   r1_o, i2_16u, r1, 0, insi, 0, 0x1010)
+    D(0xa503, IILL,    RI_a,  Z,   r1_o, i2_16u, r1, 0, insi, 0, 0x1000)
+/* INSERT PROGRAM MASK */
+    C(0xb222, IPM,     RRE,   Z,   0, 0, r1, 0, ipm, 0)
+
+/* LOAD */
+    C(0x1800, LR,      RR_a,  Z,   0, r2_o, 0, cond_r1r2_32, mov2, 0)
+    C(0x5800, L,       RX_a,  Z,   0, a2, new, r1_32, ld32s, 0)
+    C(0xe358, LY,      RXY_a, LD,  0, a2, new, r1_32, ld32s, 0)
+    C(0xb904, LGR,     RRE,   Z,   0, r2_o, 0, r1, mov2, 0)
+    C(0xb914, LGFR,    RRE,   Z,   0, r2_32s, 0, r1, mov2, 0)
+    C(0xe304, LG,      RXY_a, Z,   0, a2, r1, 0, ld64, 0)
+    C(0xe314, LGF,     RXY_a, Z,   0, a2, r1, 0, ld32s, 0)
+    F(0x2800, LDR,     RR_a,  Z,   0, f2, 0, f1, mov2, 0, IF_AFP1 | IF_AFP2)
+    F(0x6800, LD,      RX_a,  Z,   0, m2_64, 0, f1, mov2, 0, IF_AFP1)
+    F(0xed65, LDY,     RXY_a, LD,  0, m2_64, 0, f1, mov2, 0, IF_AFP1)
+    F(0x3800, LER,     RR_a,  Z,   0, e2, 0, cond_e1e2, mov2, 0, IF_AFP1 | IF_AFP2)
+    F(0x7800, LE,      RX_a,  Z,   0, m2_32u, 0, e1, mov2, 0, IF_AFP1)
+    F(0xed64, LEY,     RXY_a, LD,  0, m2_32u, 0, e1, mov2, 0, IF_AFP1)
+    F(0xb365, LXR,     RRE,   Z,   x2h, x2l, 0, x1, movx, 0, IF_AFP1)
+/* LOAD IMMEDIATE */
+    C(0xc001, LGFI,    RIL_a, EI,  0, i2, 0, r1, mov2, 0)
+/* LOAD RELATIVE LONG */
+    C(0xc40d, LRL,     RIL_b, GIE, 0, ri2, new, r1_32, ld32s, 0)
+    C(0xc408, LGRL,    RIL_b, GIE, 0, ri2, r1, 0, ld64, 0)
+    C(0xc40c, LGFRL,   RIL_b, GIE, 0, ri2, r1, 0, ld32s, 0)
+/* LOAD ADDRESS */
+    C(0x4100, LA,      RX_a,  Z,   0, a2, 0, r1, mov2, 0)
+    C(0xe371, LAY,     RXY_a, LD,  0, a2, 0, r1, mov2, 0)
+/* LOAD ADDRESS EXTENDED */
+    C(0x5100, LAE,     RX_a,  Z,   0, a2, 0, r1, mov2e, 0)
+    C(0xe375, LAEY,    RXY_a, GIE, 0, a2, 0, r1, mov2e, 0)
+/* LOAD ADDRESS RELATIVE LONG */
+    C(0xc000, LARL,    RIL_b, Z,   0, ri2, 0, r1, mov2, 0)
+/* LOAD AND ADD */
+    D(0xebf8, LAA,     RSY_a, ILA, r3_32s, a2, new, in2_r1_32, laa, adds32, MO_TESL)
+    D(0xebe8, LAAG,    RSY_a, ILA, r3, a2, new, in2_r1, laa, adds64, MO_TEQ)
+/* LOAD AND ADD LOGICAL */
+    D(0xebfa, LAAL,    RSY_a, ILA, r3_32u, a2, new, in2_r1_32, laa, addu32, MO_TEUL)
+    D(0xebea, LAALG,   RSY_a, ILA, r3, a2, new, in2_r1, laa, addu64, MO_TEQ)
+/* LOAD AND AND */
+    D(0xebf4, LAN,     RSY_a, ILA, r3_32s, a2, new, in2_r1_32, lan, nz32, MO_TESL)
+    D(0xebe4, LANG,    RSY_a, ILA, r3, a2, new, in2_r1, lan, nz64, MO_TEQ)
+/* LOAD AND EXCLUSIVE OR */
+    D(0xebf7, LAX,     RSY_a, ILA, r3_32s, a2, new, in2_r1_32, lax, nz32, MO_TESL)
+    D(0xebe7, LAXG,    RSY_a, ILA, r3, a2, new, in2_r1, lax, nz64, MO_TEQ)
+/* LOAD AND OR */
+    D(0xebf6, LAO,     RSY_a, ILA, r3_32s, a2, new, in2_r1_32, lao, nz32, MO_TESL)
+    D(0xebe6, LAOG,    RSY_a, ILA, r3, a2, new, in2_r1, lao, nz64, MO_TEQ)
+/* LOAD AND TEST */
+    C(0x1200, LTR,     RR_a,  Z,   0, r2_o, 0, cond_r1r2_32, mov2, s32)
+    C(0xb902, LTGR,    RRE,   Z,   0, r2_o, 0, r1, mov2, s64)
+    C(0xb912, LTGFR,   RRE,   Z,   0, r2_32s, 0, r1, mov2, s64)
+    C(0xe312, LT,      RXY_a, EI,  0, a2, new, r1_32, ld32s, s64)
+    C(0xe302, LTG,     RXY_a, EI,  0, a2, r1, 0, ld64, s64)
+    C(0xe332, LTGF,    RXY_a, GIE, 0, a2, r1, 0, ld32s, s64)
+    F(0xb302, LTEBR,   RRE,   Z,   0, e2, 0, cond_e1e2, mov2, f32, IF_BFP)
+    F(0xb312, LTDBR,   RRE,   Z,   0, f2, 0, f1, mov2, f64, IF_BFP)
+    F(0xb342, LTXBR,   RRE,   Z,   x2h, x2l, 0, x1, movx, f128, IF_BFP)
+/* LOAD AND TRAP */
+    C(0xe39f, LAT,     RXY_a, LAT, 0, m2_32u, r1, 0, lat, 0)
+    C(0xe385, LGAT,    RXY_a, LAT, 0, a2, r1, 0, lgat, 0)
+/* LOAD AND ZERO RIGHTMOST BYTE */
+    C(0xe3eb, LZRF,    RXY_a, LZRB, 0, m2_32u, new, r1_32, lzrb, 0)
+    C(0xe32a, LZRG,    RXY_a, LZRB, 0, m2_64, r1, 0, lzrb, 0)
+/* LOAD LOGICAL AND ZERO RIGHTMOST BYTE */
+    C(0xe33a, LLZRGF,  RXY_a, LZRB, 0, m2_32u, r1, 0, lzrb, 0)
+/* LOAD BYTE */
+    C(0xb926, LBR,     RRE,   EI,  0, r2_8s, 0, r1_32, mov2, 0)
+    C(0xb906, LGBR,    RRE,   EI,  0, r2_8s, 0, r1, mov2, 0)
+    C(0xe376, LB,      RXY_a, LD,  0, a2, new, r1_32, ld8s, 0)
+    C(0xe377, LGB,     RXY_a, LD,  0, a2, r1, 0, ld8s, 0)
+/* LOAD BYTE HIGH */
+    C(0xe3c0, LBH,     RXY_a, HW,  0, a2, new, r1_32h, ld8s, 0)
+/* LOAD COMPLEMENT */
+    C(0x1300, LCR,     RR_a,  Z,   0, r2, new, r1_32, neg, neg32)
+    C(0xb903, LCGR,    RRE,   Z,   0, r2, r1, 0, neg, neg64)
+    C(0xb913, LCGFR,   RRE,   Z,   0, r2_32s, r1, 0, neg, neg64)
+    F(0xb303, LCEBR,   RRE,   Z,   0, e2, new, e1, negf32, f32, IF_BFP)
+    F(0xb313, LCDBR,   RRE,   Z,   0, f2, new, f1, negf64, f64, IF_BFP)
+    F(0xb343, LCXBR,   RRE,   Z,   x2h, x2l, new_P, x1, negf128, f128, IF_BFP)
+    F(0xb373, LCDFR,   RRE,   FPSSH, 0, f2, new, f1, negf64, 0, IF_AFP1 | IF_AFP2)
+/* LOAD COUNT TO BLOCK BOUNDARY */
+    C(0xe727, LCBB,    RXE,   V,   la2, 0, r1, 0, lcbb, 0)
+/* LOAD HALFWORD */
+    C(0xb927, LHR,     RRE,   EI,  0, r2_16s, 0, r1_32, mov2, 0)
+    C(0xb907, LGHR,    RRE,   EI,  0, r2_16s, 0, r1, mov2, 0)
+    C(0x4800, LH,      RX_a,  Z,   0, a2, new, r1_32, ld16s, 0)
+    C(0xe378, LHY,     RXY_a, LD,  0, a2, new, r1_32, ld16s, 0)
+    C(0xe315, LGH,     RXY_a, Z,   0, a2, r1, 0, ld16s, 0)
+/* LOAD HALFWORD HIGH */
+    C(0xe3c4, LHH,     RXY_a, HW,  0, a2, new, r1_32h, ld16s, 0)
+/* LOAD HALFWORD IMMEDIATE */
+    C(0xa708, LHI,     RI_a,  Z,   0, i2, 0, r1_32, mov2, 0)
+    C(0xa709, LGHI,    RI_a,  Z,   0, i2, 0, r1, mov2, 0)
+/* LOAD HALFWORD RELATIVE LONG */
+    C(0xc405, LHRL,    RIL_b, GIE, 0, ri2, new, r1_32, ld16s, 0)
+    C(0xc404, LGHRL,   RIL_b, GIE, 0, ri2, r1, 0, ld16s, 0)
+/* LOAD HIGH */
+    C(0xe3ca, LFH,     RXY_a, HW,  0, a2, new, r1_32h, ld32u, 0)
+/* LOAG HIGH AND TRAP */
+    C(0xe3c8, LFHAT,   RXY_a, LAT, 0, m2_32u, r1, 0, lfhat, 0)
+/* LOAD LOGICAL */
+    C(0xb916, LLGFR,   RRE,   Z,   0, r2_32u, 0, r1, mov2, 0)
+    C(0xe316, LLGF,    RXY_a, Z,   0, a2, r1, 0, ld32u, 0)
+/* LOAD LOGICAL AND TRAP */
+    C(0xe39d, LLGFAT,  RXY_a, LAT, 0, a2, r1, 0, llgfat, 0)
+/* LOAD LOGICAL RELATIVE LONG */
+    C(0xc40e, LLGFRL,  RIL_b, GIE, 0, ri2, r1, 0, ld32u, 0)
+/* LOAD LOGICAL CHARACTER */
+    C(0xb994, LLCR,    RRE,   EI,  0, r2_8u, 0, r1_32, mov2, 0)
+    C(0xb984, LLGCR,   RRE,   EI,  0, r2_8u, 0, r1, mov2, 0)
+    C(0xe394, LLC,     RXY_a, EI,  0, a2, new, r1_32, ld8u, 0)
+    C(0xe390, LLGC,    RXY_a, Z,   0, a2, r1, 0, ld8u, 0)
+/* LOAD LOGICAL CHARACTER HIGH */
+    C(0xe3c2, LLCH,    RXY_a, HW,  0, a2, new, r1_32h, ld8u, 0)
+/* LOAD LOGICAL HALFWORD */
+    C(0xb995, LLHR,    RRE,   EI,  0, r2_16u, 0, r1_32, mov2, 0)
+    C(0xb985, LLGHR,   RRE,   EI,  0, r2_16u, 0, r1, mov2, 0)
+    C(0xe395, LLH,     RXY_a, EI,  0, a2, new, r1_32, ld16u, 0)
+    C(0xe391, LLGH,    RXY_a, Z,   0, a2, r1, 0, ld16u, 0)
+/* LOAD LOGICAL HALFWORD HIGH */
+    C(0xe3c6, LLHH,    RXY_a, HW,  0, a2, new, r1_32h, ld16u, 0)
+/* LOAD LOGICAL HALFWORD RELATIVE LONG */
+    C(0xc402, LLHRL,   RIL_b, GIE, 0, ri2, new, r1_32, ld16u, 0)
+    C(0xc406, LLGHRL,  RIL_b, GIE, 0, ri2, r1, 0, ld16u, 0)
+/* LOAD LOGICAL IMMEDATE */
+    D(0xc00e, LLIHF,   RIL_a, EI, 0, i2_32u_shl, 0, r1, mov2, 0, 32)
+    D(0xc00f, LLILF,   RIL_a, EI, 0, i2_32u_shl, 0, r1, mov2, 0, 0)
+    D(0xa50c, LLIHH,   RI_a,  Z,  0, i2_16u_shl, 0, r1, mov2, 0, 48)
+    D(0xa50d, LLIHL,   RI_a,  Z,  0, i2_16u_shl, 0, r1, mov2, 0, 32)
+    D(0xa50e, LLILH,   RI_a,  Z,  0, i2_16u_shl, 0, r1, mov2, 0, 16)
+    D(0xa50f, LLILL,   RI_a,  Z,  0, i2_16u_shl, 0, r1, mov2, 0, 0)
+/* LOAD LOGICAL THIRTY ONE BITS */
+    C(0xb917, LLGTR,   RRE,   Z,  0, r2_o, r1, 0, llgt, 0)
+    C(0xe317, LLGT,    RXY_a, Z,  0, m2_32u, r1, 0, llgt, 0)
+/* LOAD LOGICAL THIRTY ONE BITS AND TRAP */
+    C(0xe39c, LLGTAT,  RXY_a, LAT, 0, m2_32u, r1, 0, llgtat, 0)
+
+/* LOAD FPR FROM GR */
+    F(0xb3c1, LDGR,    RRE,   FPRGR, 0, r2_o, 0, f1, mov2, 0, IF_AFP1)
+/* LOAD GR FROM FPR */
+    F(0xb3cd, LGDR,    RRE,   FPRGR, 0, f2, 0, r1, mov2, 0, IF_AFP2)
+/* LOAD NEGATIVE */
+    C(0x1100, LNR,     RR_a,  Z,   0, r2_32s, new, r1_32, nabs, nabs32)
+    C(0xb901, LNGR,    RRE,   Z,   0, r2, r1, 0, nabs, nabs64)
+    C(0xb911, LNGFR,   RRE,   Z,   0, r2_32s, r1, 0, nabs, nabs64)
+    F(0xb301, LNEBR,   RRE,   Z,   0, e2, new, e1, nabsf32, f32, IF_BFP)
+    F(0xb311, LNDBR,   RRE,   Z,   0, f2, new, f1, nabsf64, f64, IF_BFP)
+    F(0xb341, LNXBR,   RRE,   Z,   x2h, x2l, new_P, x1, nabsf128, f128, IF_BFP)
+    F(0xb371, LNDFR,   RRE,   FPSSH, 0, f2, new, f1, nabsf64, 0, IF_AFP1 | IF_AFP2)
+/* LOAD ON CONDITION */
+    C(0xb9f2, LOCR,    RRF_c, LOC, r1, r2, new, r1_32, loc, 0)
+    C(0xb9e2, LOCGR,   RRF_c, LOC, r1, r2, r1, 0, loc, 0)
+    C(0xebf2, LOC,     RSY_b, LOC, r1, m2_32u, new, r1_32, loc, 0)
+    C(0xebe2, LOCG,    RSY_b, LOC, r1, m2_64, r1, 0, loc, 0)
+/* LOAD HALFWORD IMMEDIATE ON CONDITION */
+    C(0xec42, LOCHI,   RIE_g, LOC2, r1, i2, new, r1_32, loc, 0)
+    C(0xec46, LOCGHI,  RIE_g, LOC2, r1, i2, r1, 0, loc, 0)
+    C(0xec4e, LOCHHI,  RIE_g, LOC2, r1_sr32, i2, new, r1_32h, loc, 0)
+/* LOAD HIGH ON CONDITION */
+    C(0xb9e0, LOCFHR,  RRF_c, LOC2, r1_sr32, r2, new, r1_32h, loc, 0)
+    C(0xebe0, LOCFH,   RSY_b, LOC2, r1_sr32, m2_32u, new, r1_32h, loc, 0)
+/* LOAD PAIR DISJOINT */
+    D(0xc804, LPD,     SSF,   ILA, 0, 0, new_P, r3_P32, lpd, 0, MO_TEUL)
+    D(0xc805, LPDG,    SSF,   ILA, 0, 0, new_P, r3_P64, lpd, 0, MO_TEQ)
+/* LOAD PAIR FROM QUADWORD */
+    C(0xe38f, LPQ,     RXY_a, Z,   0, a2, r1_P, 0, lpq, 0)
+/* LOAD POSITIVE */
+    C(0x1000, LPR,     RR_a,  Z,   0, r2_32s, new, r1_32, abs, abs32)
+    C(0xb900, LPGR,    RRE,   Z,   0, r2, r1, 0, abs, abs64)
+    C(0xb910, LPGFR,   RRE,   Z,   0, r2_32s, r1, 0, abs, abs64)
+    F(0xb300, LPEBR,   RRE,   Z,   0, e2, new, e1, absf32, f32, IF_BFP)
+    F(0xb310, LPDBR,   RRE,   Z,   0, f2, new, f1, absf64, f64, IF_BFP)
+    F(0xb340, LPXBR,   RRE,   Z,   x2h, x2l, new_P, x1, absf128, f128, IF_BFP)
+    F(0xb370, LPDFR,   RRE,   FPSSH, 0, f2, new, f1, absf64, 0, IF_AFP1 | IF_AFP2)
+/* LOAD REVERSED */
+    C(0xb91f, LRVR,    RRE,   Z,   0, r2_32u, new, r1_32, rev32, 0)
+    C(0xb90f, LRVGR,   RRE,   Z,   0, r2_o, r1, 0, rev64, 0)
+    C(0xe31f, LRVH,    RXY_a, Z,   0, m2_16u, new, r1_16, rev16, 0)
+    C(0xe31e, LRV,     RXY_a, Z,   0, m2_32u, new, r1_32, rev32, 0)
+    C(0xe30f, LRVG,    RXY_a, Z,   0, m2_64, r1, 0, rev64, 0)
+/* LOAD ZERO */
+    F(0xb374, LZER,    RRE,   Z,   0, 0, 0, e1, zero, 0, IF_AFP1)
+    F(0xb375, LZDR,    RRE,   Z,   0, 0, 0, f1, zero, 0, IF_AFP1)
+    F(0xb376, LZXR,    RRE,   Z,   0, 0, 0, x1, zero2, 0, IF_AFP1)
+
+/* LOAD FPC */
+    F(0xb29d, LFPC,    S,     Z,   0, m2_32u, 0, 0, sfpc, 0, IF_BFP)
+/* LOAD FPC AND SIGNAL */
+    F(0xb2bd, LFAS,    S,     IEEEE_SIM, 0, m2_32u, 0, 0, sfas, 0, IF_DFP)
+/* LOAD FP INTEGER */
+    F(0xb357, FIEBR,   RRF_e, Z,   0, e2, new, e1, fieb, 0, IF_BFP)
+    F(0xb35f, FIDBR,   RRF_e, Z,   0, f2, new, f1, fidb, 0, IF_BFP)
+    F(0xb347, FIXBR,   RRF_e, Z,   x2h, x2l, new_P, x1, fixb, 0, IF_BFP)
+
+/* LOAD LENGTHENED */
+    F(0xb304, LDEBR,   RRE,   Z,   0, e2, new, f1, ldeb, 0, IF_BFP)
+    F(0xb305, LXDBR,   RRE,   Z,   0, f2, new_P, x1, lxdb, 0, IF_BFP)
+    F(0xb306, LXEBR,   RRE,   Z,   0, e2, new_P, x1, lxeb, 0, IF_BFP)
+    F(0xed04, LDEB,    RXE,   Z,   0, m2_32u, new, f1, ldeb, 0, IF_BFP)
+    F(0xed05, LXDB,    RXE,   Z,   0, m2_64, new_P, x1, lxdb, 0, IF_BFP)
+    F(0xed06, LXEB,    RXE,   Z,   0, m2_32u, new_P, x1, lxeb, 0, IF_BFP)
+    F(0xb324, LDER,    RXE,   Z,   0, e2, new, f1, lde, 0, IF_AFP1)
+    F(0xed24, LDE,     RXE,   Z,   0, m2_32u, new, f1, lde, 0, IF_AFP1)
+/* LOAD ROUNDED */
+    F(0xb344, LEDBR,   RRF_e, Z,   0, f2, new, e1, ledb, 0, IF_BFP)
+    F(0xb345, LDXBR,   RRF_e, Z,   x2h, x2l, new, f1, ldxb, 0, IF_BFP)
+    F(0xb346, LEXBR,   RRF_e, Z,   x2h, x2l, new, e1, lexb, 0, IF_BFP)
+
+/* LOAD MULTIPLE */
+    C(0x9800, LM,      RS_a,  Z,   0, a2, 0, 0, lm32, 0)
+    C(0xeb98, LMY,     RSY_a, LD,  0, a2, 0, 0, lm32, 0)
+    C(0xeb04, LMG,     RSY_a, Z,   0, a2, 0, 0, lm64, 0)
+/* LOAD MULTIPLE HIGH */
+    C(0xeb96, LMH,     RSY_a, Z,   0, a2, 0, 0, lmh, 0)
+/* LOAD ACCESS MULTIPLE */
+    C(0x9a00, LAM,     RS_a,  Z,   0, a2, 0, 0, lam, 0)
+    C(0xeb9a, LAMY,    RSY_a, LD,  0, a2, 0, 0, lam, 0)
+
+/* MONITOR CALL */
+    C(0xaf00, MC,      SI,    Z,   la1, 0, 0, 0, mc, 0)
+
+/* MOVE */
+    C(0xd200, MVC,     SS_a,  Z,   la1, a2, 0, 0, mvc, 0)
+    C(0xe544, MVHHI,   SIL,   GIE, la1, i2, 0, m1_16, mov2, 0)
+    C(0xe54c, MVHI,    SIL,   GIE, la1, i2, 0, m1_32, mov2, 0)
+    C(0xe548, MVGHI,   SIL,   GIE, la1, i2, 0, m1_64, mov2, 0)
+    C(0x9200, MVI,     SI,    Z,   la1, i2, 0, m1_8, mov2, 0)
+    C(0xeb52, MVIY,    SIY,   LD,  la1, i2, 0, m1_8, mov2, 0)
+/* MOVE INVERSE */
+    C(0xe800, MVCIN,   SS_a,  Z,   la1, a2, 0, 0, mvcin, 0)
+/* MOVE LONG */
+    C(0x0e00, MVCL,    RR_a,  Z,   0, 0, 0, 0, mvcl, 0)
+/* MOVE LONG EXTENDED */
+    C(0xa800, MVCLE,   RS_a,  Z,   0, a2, 0, 0, mvcle, 0)
+/* MOVE LONG UNICODE */
+    C(0xeb8e, MVCLU,   RSY_a, E2,  0, a2, 0, 0, mvclu, 0)
+/* MOVE NUMERICS */
+    C(0xd100, MVN,     SS_a,  Z,   la1, a2, 0, 0, mvn, 0)
+/* MOVE PAGE */
+    C(0xb254, MVPG,    RRE,   Z,   0, 0, 0, 0, mvpg, 0)
+/* MOVE STRING */
+    C(0xb255, MVST,    RRE,   Z,   0, 0, 0, 0, mvst, 0)
+/* MOVE WITH OPTIONAL SPECIFICATION */
+    C(0xc800, MVCOS,   SSF,   MVCOS, la1, a2, 0, 0, mvcos, 0)
+/* MOVE WITH OFFSET */
+    /* Really format SS_b, but we pack both lengths into one argument
+       for the helper call, so we might as well leave one 8-bit field.  */
+    C(0xf100, MVO,     SS_a,  Z,   la1, a2, 0, 0, mvo, 0)
+/* MOVE ZONES */
+    C(0xd300, MVZ,     SS_a,  Z,   la1, a2, 0, 0, mvz, 0)
+
+/* MULTIPLY */
+    C(0x1c00, MR,      RR_a,  Z,   r1p1_32s, r2_32s, new, r1_D32, mul, 0)
+    C(0xb9ec, MGRK,    RRF_a, MIE2,r3_o, r2_o, r1_P, 0, muls128, 0)
+    C(0x5c00, M,       RX_a,  Z,   r1p1_32s, m2_32s, new, r1_D32, mul, 0)
+    C(0xe35c, MFY,     RXY_a, GIE, r1p1_32s, m2_32s, new, r1_D32, mul, 0)
+    C(0xe384, MG,      RXY_a, MIE2,r1p1_o, m2_64, r1_P, 0, muls128, 0)
+    F(0xb317, MEEBR,   RRE,   Z,   e1, e2, new, e1, meeb, 0, IF_BFP)
+    F(0xb31c, MDBR,    RRE,   Z,   f1, f2, new, f1, mdb, 0, IF_BFP)
+    F(0xb34c, MXBR,    RRE,   Z,   x2h, x2l, x1, x1, mxb, 0, IF_BFP)
+    F(0xb30c, MDEBR,   RRE,   Z,   f1, e2, new, f1, mdeb, 0, IF_BFP)
+    F(0xb307, MXDBR,   RRE,   Z,   0, f2, x1, x1, mxdb, 0, IF_BFP)
+    F(0xed17, MEEB,    RXE,   Z,   e1, m2_32u, new, e1, meeb, 0, IF_BFP)
+    F(0xed1c, MDB,     RXE,   Z,   f1, m2_64, new, f1, mdb, 0, IF_BFP)
+    F(0xed0c, MDEB,    RXE,   Z,   f1, m2_32u, new, f1, mdeb, 0, IF_BFP)
+    F(0xed07, MXDB,    RXE,   Z,   0, m2_64, x1, x1, mxdb, 0, IF_BFP)
+/* MULTIPLY HALFWORD */
+    C(0x4c00, MH,      RX_a,  Z,   r1_o, m2_16s, new, r1_32, mul, 0)
+    C(0xe37c, MHY,     RXY_a, GIE, r1_o, m2_16s, new, r1_32, mul, 0)
+    C(0xe33c, MGH,     RXY_a, MIE2,r1_o, m2_16s, r1, 0, mul, 0)
+/* MULTIPLY HALFWORD IMMEDIATE */
+    C(0xa70c, MHI,     RI_a,  Z,   r1_o, i2, new, r1_32, mul, 0)
+    C(0xa70d, MGHI,    RI_a,  Z,   r1_o, i2, r1, 0, mul, 0)
+/* MULTIPLY LOGICAL */
+    C(0xb996, MLR,     RRE,   Z,   r1p1_32u, r2_32u, new, r1_D32, mul, 0)
+    C(0xe396, ML,      RXY_a, Z,   r1p1_32u, m2_32u, new, r1_D32, mul, 0)
+    C(0xb986, MLGR,    RRE,   Z,   r1p1, r2_o, r1_P, 0, mul128, 0)
+    C(0xe386, MLG,     RXY_a, Z,   r1p1, m2_64, r1_P, 0, mul128, 0)
+/* MULTIPLY SINGLE */
+    C(0xb252, MSR,     RRE,   Z,   r1_o, r2_o, new, r1_32, mul, 0)
+    C(0xb9fd, MSRKC,   RRF_a, MIE2,r3_32s, r2_32s, new, r1_32, mul, muls32)
+    C(0x7100, MS,      RX_a,  Z,   r1_o, m2_32s, new, r1_32, mul, 0)
+    C(0xe351, MSY,     RXY_a, LD,  r1_o, m2_32s, new, r1_32, mul, 0)
+    C(0xe353, MSC,     RXY_a, MIE2,r1_32s, m2_32s, new, r1_32, mul, muls32)
+    C(0xb90c, MSGR,    RRE,   Z,   r1_o, r2_o, r1, 0, mul, 0)
+    C(0xb9ed, MSGRKC,  RRF_a, MIE2,r3_o, r2_o, new_P, out2_r1, muls128, muls64)
+    C(0xb91c, MSGFR,   RRE,   Z,   r1_o, r2_32s, r1, 0, mul, 0)
+    C(0xe30c, MSG,     RXY_a, Z,   r1_o, m2_64, r1, 0, mul, 0)
+    C(0xe383, MSGC,    RXY_a, MIE2,r1_o, m2_64, new_P, out2_r1, muls128, muls64)
+    C(0xe31c, MSGF,    RXY_a, Z,   r1_o, m2_32s, r1, 0, mul, 0)
+/* MULTIPLY SINGLE IMMEDIATE */
+    C(0xc201, MSFI,    RIL_a, GIE, r1_o, i2, new, r1_32, mul, 0)
+    C(0xc200, MSGFI,   RIL_a, GIE, r1_o, i2, r1, 0, mul, 0)
+
+/* MULTIPLY AND ADD */
+    F(0xb30e, MAEBR,   RRD,   Z,   e1, e2, new, e1, maeb, 0, IF_BFP)
+    F(0xb31e, MADBR,   RRD,   Z,   f1, f2, new, f1, madb, 0, IF_BFP)
+    F(0xed0e, MAEB,    RXF,   Z,   e1, m2_32u, new, e1, maeb, 0, IF_BFP)
+    F(0xed1e, MADB,    RXF,   Z,   f1, m2_64, new, f1, madb, 0, IF_BFP)
+/* MULTIPLY AND SUBTRACT */
+    F(0xb30f, MSEBR,   RRD,   Z,   e1, e2, new, e1, mseb, 0, IF_BFP)
+    F(0xb31f, MSDBR,   RRD,   Z,   f1, f2, new, f1, msdb, 0, IF_BFP)
+    F(0xed0f, MSEB,    RXF,   Z,   e1, m2_32u, new, e1, mseb, 0, IF_BFP)
+    F(0xed1f, MSDB,    RXF,   Z,   f1, m2_64, new, f1, msdb, 0, IF_BFP)
+
+/* OR */
+    C(0x1600, OR,      RR_a,  Z,   r1, r2, new, r1_32, or, nz32)
+    C(0xb9f6, ORK,     RRF_a, DO,  r2, r3, new, r1_32, or, nz32)
+    C(0x5600, O,       RX_a,  Z,   r1, m2_32s, new, r1_32, or, nz32)
+    C(0xe356, OY,      RXY_a, LD,  r1, m2_32s, new, r1_32, or, nz32)
+    C(0xb981, OGR,     RRE,   Z,   r1, r2, r1, 0, or, nz64)
+    C(0xb9e6, OGRK,    RRF_a, DO,  r2, r3, r1, 0, or, nz64)
+    C(0xe381, OG,      RXY_a, Z,   r1, m2_64, r1, 0, or, nz64)
+    C(0xd600, OC,      SS_a,  Z,   la1, a2, 0, 0, oc, 0)
+/* OR IMMEDIATE */
+    D(0xc00c, OIHF,    RIL_a, EI,  r1_o, i2_32u, r1, 0, ori, 0, 0x2020)
+    D(0xc00d, OILF,    RIL_a, EI,  r1_o, i2_32u, r1, 0, ori, 0, 0x2000)
+    D(0xa508, OIHH,    RI_a,  Z,   r1_o, i2_16u, r1, 0, ori, 0, 0x1030)
+    D(0xa509, OIHL,    RI_a,  Z,   r1_o, i2_16u, r1, 0, ori, 0, 0x1020)
+    D(0xa50a, OILH,    RI_a,  Z,   r1_o, i2_16u, r1, 0, ori, 0, 0x1010)
+    D(0xa50b, OILL,    RI_a,  Z,   r1_o, i2_16u, r1, 0, ori, 0, 0x1000)
+    D(0x9600, OI,      SI,    Z,   la1, i2_8u, new, 0, oi, nz64, MO_UB)
+    D(0xeb56, OIY,     SIY,   LD,  la1, i2_8u, new, 0, oi, nz64, MO_UB)
+
+/* PACK */
+    /* Really format SS_b, but we pack both lengths into one argument
+       for the helper call, so we might as well leave one 8-bit field.  */
+    C(0xf200, PACK,    SS_a,  Z,   la1, a2, 0, 0, pack, 0)
+/* PACK ASCII */
+    C(0xe900, PKA,     SS_f,  E2,  la1, a2, 0, 0, pka, 0)
+/* PACK UNICODE */
+    C(0xe100, PKU,     SS_f,  E2,  la1, a2, 0, 0, pku, 0)
+
+/* PREFETCH */
+    /* Implemented as nops of course.  */
+    C(0xe336, PFD,     RXY_b, GIE, 0, 0, 0, 0, 0, 0)
+    C(0xc602, PFDRL,   RIL_c, GIE, 0, 0, 0, 0, 0, 0)
+/* PERFORM PROCESSOR ASSIST */
+    /* Implemented as nop of course.  */
+    C(0xb2e8, PPA,     RRF_c, PPA, 0, 0, 0, 0, 0, 0)
+
+/* POPULATION COUNT */
+    C(0xb9e1, POPCNT,  RRE,   PC,  0, r2_o, r1, 0, popcnt, nz64)
+
+/* ROTATE LEFT SINGLE LOGICAL */
+    C(0xeb1d, RLL,     RSY_a, Z,   r3_o, sh32, new, r1_32, rll32, 0)
+    C(0xeb1c, RLLG,    RSY_a, Z,   r3_o, sh64, r1, 0, rll64, 0)
+
+/* ROTATE THEN INSERT SELECTED BITS */
+    C(0xec55, RISBG,   RIE_f, GIE, 0, r2, r1, 0, risbg, s64)
+    C(0xec59, RISBGN,  RIE_f, MIE, 0, r2, r1, 0, risbg, 0)
+    C(0xec5d, RISBHG,  RIE_f, HW,  0, r2, r1, 0, risbg, 0)
+    C(0xec51, RISBLG,  RIE_f, HW,  0, r2, r1, 0, risbg, 0)
+/* ROTATE_THEN <OP> SELECTED BITS */
+    C(0xec54, RNSBG,   RIE_f, GIE, 0, r2, r1, 0, rosbg, 0)
+    C(0xec56, ROSBG,   RIE_f, GIE, 0, r2, r1, 0, rosbg, 0)
+    C(0xec57, RXSBG,   RIE_f, GIE, 0, r2, r1, 0, rosbg, 0)
+
+/* SEARCH STRING */
+    C(0xb25e, SRST,    RRE,   Z,   0, 0, 0, 0, srst, 0)
+/* SEARCH STRING UNICODE */
+    C(0xb9be, SRSTU,   RRE,   ETF3, 0, 0, 0, 0, srstu, 0)
+
+/* SET ACCESS */
+    C(0xb24e, SAR,     RRE,   Z,   0, r2_o, 0, 0, sar, 0)
+/* SET ADDRESSING MODE */
+    D(0x010c, SAM24,   E,     Z,   0, 0, 0, 0, sam, 0, 0)
+    D(0x010d, SAM31,   E,     Z,   0, 0, 0, 0, sam, 0, 1)
+    D(0x010e, SAM64,   E,     Z,   0, 0, 0, 0, sam, 0, 3)
+/* SET FPC */
+    F(0xb384, SFPC,    RRE,   Z,   0, r1_o, 0, 0, sfpc, 0, IF_BFP)
+/* SET FPC AND SIGNAL */
+    F(0xb385, SFASR,   RRE,   IEEEE_SIM, 0, r1_o, 0, 0, sfas, 0, IF_DFP)
+/* SET BFP ROUNDING MODE */
+    F(0xb299, SRNM,    S,     Z,   la2, 0, 0, 0, srnm, 0, IF_BFP)
+    F(0xb2b8, SRNMB,   S,     FPE, la2, 0, 0, 0, srnmb, 0, IF_BFP)
+/* SET DFP ROUNDING MODE */
+    F(0xb2b9, SRNMT,   S,     DFPR, la2, 0, 0, 0, srnmt, 0, IF_DFP)
+/* SET PROGRAM MASK */
+    C(0x0400, SPM,     RR_a,  Z,   r1, 0, 0, 0, spm, 0)
+
+/* SHIFT LEFT SINGLE */
+    D(0x8b00, SLA,     RS_a,  Z,   r1, sh32, new, r1_32, sla, 0, 31)
+    D(0xebdd, SLAK,    RSY_a, DO,  r3, sh32, new, r1_32, sla, 0, 31)
+    D(0xeb0b, SLAG,    RSY_a, Z,   r3, sh64, r1, 0, sla, 0, 63)
+/* SHIFT LEFT SINGLE LOGICAL */
+    C(0x8900, SLL,     RS_a,  Z,   r1_o, sh32, new, r1_32, sll, 0)
+    C(0xebdf, SLLK,    RSY_a, DO,  r3_o, sh32, new, r1_32, sll, 0)
+    C(0xeb0d, SLLG,    RSY_a, Z,   r3_o, sh64, r1, 0, sll, 0)
+/* SHIFT RIGHT SINGLE */
+    C(0x8a00, SRA,     RS_a,  Z,   r1_32s, sh32, new, r1_32, sra, s32)
+    C(0xebdc, SRAK,    RSY_a, DO,  r3_32s, sh32, new, r1_32, sra, s32)
+    C(0xeb0a, SRAG,    RSY_a, Z,   r3_o, sh64, r1, 0, sra, s64)
+/* SHIFT RIGHT SINGLE LOGICAL */
+    C(0x8800, SRL,     RS_a,  Z,   r1_32u, sh32, new, r1_32, srl, 0)
+    C(0xebde, SRLK,    RSY_a, DO,  r3_32u, sh32, new, r1_32, srl, 0)
+    C(0xeb0c, SRLG,    RSY_a, Z,   r3_o, sh64, r1, 0, srl, 0)
+/* SHIFT LEFT DOUBLE */
+    D(0x8f00, SLDA,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, sla, 0, 31)
+/* SHIFT LEFT DOUBLE LOGICAL */
+    C(0x8d00, SLDL,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, sll, 0)
+/* SHIFT RIGHT DOUBLE */
+    C(0x8e00, SRDA,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, sra, s64)
+/* SHIFT RIGHT DOUBLE LOGICAL */
+    C(0x8c00, SRDL,    RS_a,  Z,   r1_D32, sh64, new, r1_D32, srl, 0)
+
+/* SQUARE ROOT */
+    F(0xb314, SQEBR,   RRE,   Z,   0, e2, new, e1, sqeb, 0, IF_BFP)
+    F(0xb315, SQDBR,   RRE,   Z,   0, f2, new, f1, sqdb, 0, IF_BFP)
+    F(0xb316, SQXBR,   RRE,   Z,   x2h, x2l, new_P, x1, sqxb, 0, IF_BFP)
+    F(0xed14, SQEB,    RXE,   Z,   0, m2_32u, new, e1, sqeb, 0, IF_BFP)
+    F(0xed15, SQDB,    RXE,   Z,   0, m2_64, new, f1, sqdb, 0, IF_BFP)
+
+/* STORE */
+    C(0x5000, ST,      RX_a,  Z,   r1_o, a2, 0, 0, st32, 0)
+    C(0xe350, STY,     RXY_a, LD,  r1_o, a2, 0, 0, st32, 0)
+    C(0xe324, STG,     RXY_a, Z,   r1_o, a2, 0, 0, st64, 0)
+    F(0x6000, STD,     RX_a,  Z,   f1, a2, 0, 0, st64, 0, IF_AFP1)
+    F(0xed67, STDY,    RXY_a, LD,  f1, a2, 0, 0, st64, 0, IF_AFP1)
+    F(0x7000, STE,     RX_a,  Z,   e1, a2, 0, 0, st32, 0, IF_AFP1)
+    F(0xed66, STEY,    RXY_a, LD,  e1, a2, 0, 0, st32, 0, IF_AFP1)
+/* STORE RELATIVE LONG */
+    C(0xc40f, STRL,    RIL_b, GIE, r1_o, ri2, 0, 0, st32, 0)
+    C(0xc40b, STGRL,   RIL_b, GIE, r1_o, ri2, 0, 0, st64, 0)
+/* STORE CHARACTER */
+    C(0x4200, STC,     RX_a,  Z,   r1_o, a2, 0, 0, st8, 0)
+    C(0xe372, STCY,    RXY_a, LD,  r1_o, a2, 0, 0, st8, 0)
+/* STORE CHARACTER HIGH */
+    C(0xe3c3, STCH,    RXY_a, HW,  r1_sr32, a2, 0, 0, st8, 0)
+/* STORE CHARACTERS UNDER MASK */
+    D(0xbe00, STCM,    RS_b,  Z,   r1_o, a2, 0, 0, stcm, 0, 0)
+    D(0xeb2d, STCMY,   RSY_b, LD,  r1_o, a2, 0, 0, stcm, 0, 0)
+    D(0xeb2c, STCMH,   RSY_b, Z,   r1_o, a2, 0, 0, stcm, 0, 32)
+/* STORE HALFWORD */
+    C(0x4000, STH,     RX_a,  Z,   r1_o, a2, 0, 0, st16, 0)
+    C(0xe370, STHY,    RXY_a, LD,  r1_o, a2, 0, 0, st16, 0)
+/* STORE HALFWORD HIGH */
+    C(0xe3c7, STHH,    RXY_a, HW,  r1_sr32, a2, 0, 0, st16, 0)
+/* STORE HALFWORD RELATIVE LONG */
+    C(0xc407, STHRL,   RIL_b, GIE, r1_o, ri2, 0, 0, st16, 0)
+/* STORE HIGH */
+    C(0xe3cb, STFH,    RXY_a, HW,  r1_sr32, a2, 0, 0, st32, 0)
+/* STORE ON CONDITION */
+    D(0xebf3, STOC,    RSY_b, LOC, 0, 0, 0, 0, soc, 0, 0)
+    D(0xebe3, STOCG,   RSY_b, LOC, 0, 0, 0, 0, soc, 0, 1)
+/* STORE HIGH ON CONDITION */
+    D(0xebe1, STOCFH,  RSY_b, LOC2, 0, 0, 0, 0, soc, 0, 2)
+/* STORE REVERSED */
+    C(0xe33f, STRVH,   RXY_a, Z,   la2, r1_16u, new, m1_16, rev16, 0)
+    C(0xe33e, STRV,    RXY_a, Z,   la2, r1_32u, new, m1_32, rev32, 0)
+    C(0xe32f, STRVG,   RXY_a, Z,   la2, r1_o, new, m1_64, rev64, 0)
+
+/* STORE CLOCK */
+    F(0xb205, STCK,    S,     Z,   la2, 0, new, m1_64, stck, 0, IF_IO)
+    F(0xb27c, STCKF,   S,     SCF, la2, 0, new, m1_64, stck, 0, IF_IO)
+/* STORE CLOCK EXTENDED */
+    F(0xb278, STCKE,   S,     Z,   0, a2, 0, 0, stcke, 0, IF_IO)
+
+/* STORE FACILITY LIST EXTENDED */
+    C(0xb2b0, STFLE,   S,  SFLE,   0, a2, 0, 0, stfle, 0)
+/* STORE FPC */
+    F(0xb29c, STFPC,   S,     Z,   0, a2, new, m2_32, efpc, 0, IF_BFP)
+
+/* STORE MULTIPLE */
+    D(0x9000, STM,     RS_a,  Z,   0, a2, 0, 0, stm, 0, 4)
+    D(0xeb90, STMY,    RSY_a, LD,  0, a2, 0, 0, stm, 0, 4)
+    D(0xeb24, STMG,    RSY_a, Z,   0, a2, 0, 0, stm, 0, 8)
+/* STORE MULTIPLE HIGH */
+    C(0xeb26, STMH,    RSY_a, Z,   0, a2, 0, 0, stmh, 0)
+/* STORE ACCESS MULTIPLE */
+    C(0x9b00, STAM,    RS_a,  Z,   0, a2, 0, 0, stam, 0)
+    C(0xeb9b, STAMY,   RSY_a, LD,  0, a2, 0, 0, stam, 0)
+/* STORE PAIR TO QUADWORD */
+    C(0xe38e, STPQ,    RXY_a, Z,   0, a2, r1_P, 0, stpq, 0)
+
+/* SUBTRACT */
+    C(0x1b00, SR,      RR_a,  Z,   r1, r2, new, r1_32, sub, subs32)
+    C(0xb9f9, SRK,     RRF_a, DO,  r2, r3, new, r1_32, sub, subs32)
+    C(0x5b00, S,       RX_a,  Z,   r1, m2_32s, new, r1_32, sub, subs32)
+    C(0xe35b, SY,      RXY_a, LD,  r1, m2_32s, new, r1_32, sub, subs32)
+    C(0xb909, SGR,     RRE,   Z,   r1, r2, r1, 0, sub, subs64)
+    C(0xb919, SGFR,    RRE,   Z,   r1, r2_32s, r1, 0, sub, subs64)
+    C(0xb9e9, SGRK,    RRF_a, DO,  r2, r3, r1, 0, sub, subs64)
+    C(0xe309, SG,      RXY_a, Z,   r1, m2_64, r1, 0, sub, subs64)
+    C(0xe319, SGF,     RXY_a, Z,   r1, m2_32s, r1, 0, sub, subs64)
+    F(0xb30b, SEBR,    RRE,   Z,   e1, e2, new, e1, seb, f32, IF_BFP)
+    F(0xb31b, SDBR,    RRE,   Z,   f1, f2, new, f1, sdb, f64, IF_BFP)
+    F(0xb34b, SXBR,    RRE,   Z,   x2h, x2l, x1, x1, sxb, f128, IF_BFP)
+    F(0xed0b, SEB,     RXE,   Z,   e1, m2_32u, new, e1, seb, f32, IF_BFP)
+    F(0xed1b, SDB,     RXE,   Z,   f1, m2_64, new, f1, sdb, f64, IF_BFP)
+/* SUBTRACT HALFWORD */
+    C(0x4b00, SH,      RX_a,  Z,   r1, m2_16s, new, r1_32, sub, subs32)
+    C(0xe37b, SHY,     RXY_a, LD,  r1, m2_16s, new, r1_32, sub, subs32)
+    C(0xe339, SGH,     RXY_a, MIE2,r1, m2_16s, r1, 0, sub, subs64)
+/* SUBTRACT HIGH */
+    C(0xb9c9, SHHHR,   RRF_a, HW,  r2_sr32, r3_sr32, new, r1_32h, sub, subs32)
+    C(0xb9d9, SHHLR,   RRF_a, HW,  r2_sr32, r3, new, r1_32h, sub, subs32)
+/* SUBTRACT LOGICAL */
+    C(0x1f00, SLR,     RR_a,  Z,   r1_32u, r2_32u, new, r1_32, sub, subu32)
+    C(0xb9fb, SLRK,    RRF_a, DO,  r2_32u, r3_32u, new, r1_32, sub, subu32)
+    C(0x5f00, SL,      RX_a,  Z,   r1_32u, m2_32u, new, r1_32, sub, subu32)
+    C(0xe35f, SLY,     RXY_a, LD,  r1_32u, m2_32u, new, r1_32, sub, subu32)
+    C(0xb90b, SLGR,    RRE,   Z,   r1, r2, r1, 0, subu64, subu64)
+    C(0xb91b, SLGFR,   RRE,   Z,   r1, r2_32u, r1, 0, subu64, subu64)
+    C(0xb9eb, SLGRK,   RRF_a, DO,  r2, r3, r1, 0, subu64, subu64)
+    C(0xe30b, SLG,     RXY_a, Z,   r1, m2_64, r1, 0, subu64, subu64)
+    C(0xe31b, SLGF,    RXY_a, Z,   r1, m2_32u, r1, 0, subu64, subu64)
+/* SUBTRACT LOCICAL HIGH */
+    C(0xb9cb, SLHHHR,  RRF_a, HW,  r2_sr32, r3_sr32, new, r1_32h, sub, subu32)
+    C(0xb9db, SLHHLR,  RRF_a, HW,  r2_sr32, r3_32u, new, r1_32h, sub, subu32)
+/* SUBTRACT LOGICAL IMMEDIATE */
+    C(0xc205, SLFI,    RIL_a, EI,  r1_32u, i2_32u, new, r1_32, sub, subu32)
+    C(0xc204, SLGFI,   RIL_a, EI,  r1, i2_32u, r1, 0, subu64, subu64)
+/* SUBTRACT LOGICAL WITH BORROW */
+    C(0xb999, SLBR,    RRE,   Z,   r1_32u, r2_32u, new, r1_32, subb32, subu32)
+    C(0xb989, SLBGR,   RRE,   Z,   r1, r2, r1, 0, subb64, subu64)
+    C(0xe399, SLB,     RXY_a, Z,   r1_32u, m2_32u, new, r1_32, subb32, subu32)
+    C(0xe389, SLBG,    RXY_a, Z,   r1, m2_64, r1, 0, subb64, subu64)
+
+/* SUPERVISOR CALL */
+    C(0x0a00, SVC,     I,     Z,   0, 0, 0, 0, svc, 0)
+
+/* TEST ADDRESSING MODE */
+    C(0x010b, TAM,     E,     Z,   0, 0, 0, 0, tam, 0)
+
+/* TEST AND SET */
+    C(0x9300, TS,      S,     Z,   0, a2, 0, 0, ts, 0)
+
+/* TEST DATA CLASS */
+    F(0xed10, TCEB,    RXE,   Z,   e1, a2, 0, 0, tceb, 0, IF_BFP)
+    F(0xed11, TCDB,    RXE,   Z,   f1, a2, 0, 0, tcdb, 0, IF_BFP)
+    F(0xed12, TCXB,    RXE,   Z,   0, a2, x1, 0, tcxb, 0, IF_BFP)
+
+/* TEST DECIMAL */
+    C(0xebc0, TP,      RSL,   E2,  la1, 0, 0, 0, tp, 0)
+
+/* TEST UNDER MASK */
+    C(0x9100, TM,      SI,    Z,   m1_8u, i2_8u, 0, 0, 0, tm32)
+    C(0xeb51, TMY,     SIY,   LD,  m1_8u, i2_8u, 0, 0, 0, tm32)
+    D(0xa702, TMHH,    RI_a,  Z,   r1_o, i2_16u_shl, 0, 0, 0, tm64, 48)
+    D(0xa703, TMHL,    RI_a,  Z,   r1_o, i2_16u_shl, 0, 0, 0, tm64, 32)
+    D(0xa700, TMLH,    RI_a,  Z,   r1_o, i2_16u_shl, 0, 0, 0, tm64, 16)
+    D(0xa701, TMLL,    RI_a,  Z,   r1_o, i2_16u_shl, 0, 0, 0, tm64, 0)
+
+/* TRANSLATE */
+    C(0xdc00, TR,      SS_a,  Z,   la1, a2, 0, 0, tr, 0)
+/* TRANSLATE AND TEST */
+    C(0xdd00, TRT,     SS_a,  Z,   la1, a2, 0, 0, trt, 0)
+/* TRANSLATE AND TEST REVERSE */
+    C(0xd000, TRTR,    SS_a,  ETF3, la1, a2, 0, 0, trtr, 0)
+/* TRANSLATE EXTENDED */
+    C(0xb2a5, TRE,     RRE,   Z,   0, r2, r1_P, 0, tre, 0)
+
+/* TRANSLATE ONE TO ONE */
+    C(0xb993, TROO,    RRF_c, E2,  0, 0, 0, 0, trXX, 0)
+/* TRANSLATE ONE TO TWO */
+    C(0xb992, TROT,    RRF_c, E2,  0, 0, 0, 0, trXX, 0)
+/* TRANSLATE TWO TO ONE */
+    C(0xb991, TRTO,    RRF_c, E2,  0, 0, 0, 0, trXX, 0)
+/* TRANSLATE TWO TO TWO */
+    C(0xb990, TRTT,    RRF_c, E2,  0, 0, 0, 0, trXX, 0)
+
+/* UNPACK */
+    /* Really format SS_b, but we pack both lengths into one argument
+       for the helper call, so we might as well leave one 8-bit field.  */
+    C(0xf300, UNPK,    SS_a,  Z,   la1, a2, 0, 0, unpk, 0)
+/* UNPACK ASCII */
+    C(0xea00, UNPKA,   SS_a,  E2,  la1, a2, 0, 0, unpka, 0)
+/* UNPACK UNICODE */
+    C(0xe200, UNPKU,   SS_a,  E2,  la1, a2, 0, 0, unpku, 0)
+
+/* MSA Instructions */
+    D(0xb91e, KMAC,    RRE,   MSA,  0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KMAC)
+    D(0xb928, PCKMO,   RRE,   MSA3, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_PCKMO)
+    D(0xb92a, KMF,     RRE,   MSA4, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KMF)
+    D(0xb92b, KMO,     RRE,   MSA4, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KMO)
+    D(0xb92c, PCC,     RRE,   MSA4, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_PCC)
+    D(0xb92d, KMCTR,   RRF_b, MSA4, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KMCTR)
+    D(0xb92e, KM,      RRE,   MSA,  0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KM)
+    D(0xb92f, KMC,     RRE,   MSA,  0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KMC)
+    D(0xb929, KMA,     RRF_b, MSA8, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KMA)
+    D(0xb93c, PPNO,    RRE,   MSA5, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_PPNO)
+    D(0xb93e, KIMD,    RRE,   MSA,  0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KIMD)
+    D(0xb93f, KLMD,    RRE,   MSA,  0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KLMD)
+
+/* === Vector Support Instructions === */
+
+/* VECTOR BIT PERMUTE */
+    E(0xe785, VBPERM,  VRR_c, VE,  0, 0, 0, 0, vbperm, 0, 0, IF_VEC)
+/* VECTOR GATHER ELEMENT */
+    E(0xe713, VGEF,    VRV,   V,   la2, 0, 0, 0, vge, 0, ES_32, IF_VEC)
+    E(0xe712, VGEG,    VRV,   V,   la2, 0, 0, 0, vge, 0, ES_64, IF_VEC)
+/* VECTOR GENERATE BYTE MASK */
+    F(0xe744, VGBM,    VRI_a, V,   0, 0, 0, 0, vgbm, 0, IF_VEC)
+/* VECTOR GENERATE MASK */
+    F(0xe746, VGM,     VRI_b, V,   0, 0, 0, 0, vgm, 0, IF_VEC)
+/* VECTOR LOAD */
+    F(0xe706, VL,      VRX,   V,   la2, 0, 0, 0, vl, 0, IF_VEC)
+    F(0xe756, VLR,     VRR_a, V,   0, 0, 0, 0, vlr, 0, IF_VEC)
+/* VECTOR LOAD AND REPLICATE */
+    F(0xe705, VLREP,   VRX,   V,   la2, 0, 0, 0, vlrep, 0, IF_VEC)
+/* VECTOR LOAD ELEMENT */
+    E(0xe700, VLEB,    VRX,   V,   la2, 0, 0, 0, vle, 0, ES_8, IF_VEC)
+    E(0xe701, VLEH,    VRX,   V,   la2, 0, 0, 0, vle, 0, ES_16, IF_VEC)
+    E(0xe703, VLEF,    VRX,   V,   la2, 0, 0, 0, vle, 0, ES_32, IF_VEC)
+    E(0xe702, VLEG,    VRX,   V,   la2, 0, 0, 0, vle, 0, ES_64, IF_VEC)
+/* VECTOR LOAD ELEMENT IMMEDIATE */
+    E(0xe740, VLEIB,   VRI_a, V,   0, 0, 0, 0, vlei, 0, ES_8, IF_VEC)
+    E(0xe741, VLEIH,   VRI_a, V,   0, 0, 0, 0, vlei, 0, ES_16, IF_VEC)
+    E(0xe743, VLEIF,   VRI_a, V,   0, 0, 0, 0, vlei, 0, ES_32, IF_VEC)
+    E(0xe742, VLEIG,   VRI_a, V,   0, 0, 0, 0, vlei, 0, ES_64, IF_VEC)
+/* VECTOR LOAD GR FROM VR ELEMENT */
+    F(0xe721, VLGV,    VRS_c, V,   la2, 0, r1, 0, vlgv, 0, IF_VEC)
+/* VECTOR LOAD LOGICAL ELEMENT AND ZERO */
+    F(0xe704, VLLEZ,   VRX,   V,   la2, 0, 0, 0, vllez, 0, IF_VEC)
+/* VECTOR LOAD MULTIPLE */
+    F(0xe736, VLM,     VRS_a, V,   la2, 0, 0, 0, vlm, 0, IF_VEC)
+/* VECTOR LOAD TO BLOCK BOUNDARY */
+    F(0xe707, VLBB,    VRX,   V,   la2, 0, 0, 0, vlbb, 0, IF_VEC)
+/* VECTOR LOAD VR ELEMENT FROM GR */
+    F(0xe722, VLVG,    VRS_b, V,   la2, r3, 0, 0, vlvg, 0, IF_VEC)
+/* VECTOR LOAD VR FROM GRS DISJOINT */
+    F(0xe762, VLVGP,   VRR_f, V,   r2, r3, 0, 0, vlvgp, 0, IF_VEC)
+/* VECTOR LOAD WITH LENGTH */
+    F(0xe737, VLL,     VRS_b, V,   la2, r3_32u, 0, 0, vll, 0, IF_VEC)
+/* VECTOR MERGE HIGH */
+    F(0xe761, VMRH,    VRR_c, V,   0, 0, 0, 0, vmr, 0, IF_VEC)
+/* VECTOR MERGE LOW */
+    F(0xe760, VMRL,    VRR_c, V,   0, 0, 0, 0, vmr, 0, IF_VEC)
+/* VECTOR PACK */
+    F(0xe794, VPK,     VRR_c, V,   0, 0, 0, 0, vpk, 0, IF_VEC)
+/* VECTOR PACK SATURATE */
+    F(0xe797, VPKS,    VRR_b, V,   0, 0, 0, 0, vpk, 0, IF_VEC)
+/* VECTOR PACK LOGICAL SATURATE */
+    F(0xe795, VPKLS,   VRR_b, V,   0, 0, 0, 0, vpk, 0, IF_VEC)
+    F(0xe78c, VPERM,   VRR_e, V,   0, 0, 0, 0, vperm, 0, IF_VEC)
+/* VECTOR PERMUTE DOUBLEWORD IMMEDIATE */
+    F(0xe784, VPDI,    VRR_c, V,   0, 0, 0, 0, vpdi, 0, IF_VEC)
+/* VECTOR REPLICATE */
+    F(0xe74d, VREP,    VRI_c, V,   0, 0, 0, 0, vrep, 0, IF_VEC)
+/* VECTOR REPLICATE IMMEDIATE */
+    F(0xe745, VREPI,   VRI_a, V,   0, 0, 0, 0, vrepi, 0, IF_VEC)
+/* VECTOR SCATTER ELEMENT */
+    E(0xe71b, VSCEF,   VRV,   V,   la2, 0, 0, 0, vsce, 0, ES_32, IF_VEC)
+    E(0xe71a, VSCEG,   VRV,   V,   la2, 0, 0, 0, vsce, 0, ES_64, IF_VEC)
+/* VECTOR SELECT */
+    F(0xe78d, VSEL,    VRR_e, V,   0, 0, 0, 0, vsel, 0, IF_VEC)
+/* VECTOR SIGN EXTEND TO DOUBLEWORD */
+    F(0xe75f, VSEG,    VRR_a, V,   0, 0, 0, 0, vseg, 0, IF_VEC)
+/* VECTOR STORE */
+    F(0xe70e, VST,     VRX,   V,   la2, 0, 0, 0, vst, 0, IF_VEC)
+/* VECTOR STORE ELEMENT */
+    E(0xe708, VSTEB,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_8, IF_VEC)
+    E(0xe709, VSTEH,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_16, IF_VEC)
+    E(0xe70b, VSTEF,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_32, IF_VEC)
+    E(0xe70a, VSTEG,   VRX,   V,   la2, 0, 0, 0, vste, 0, ES_64, IF_VEC)
+/* VECTOR STORE MULTIPLE */
+    F(0xe73e, VSTM,    VRS_a, V,   la2, 0, 0, 0, vstm, 0, IF_VEC)
+/* VECTOR STORE WITH LENGTH */
+    F(0xe73f, VSTL,    VRS_b, V,   la2, r3_32u, 0, 0, vstl, 0, IF_VEC)
+/* VECTOR UNPACK HIGH */
+    F(0xe7d7, VUPH,    VRR_a, V,   0, 0, 0, 0, vup, 0, IF_VEC)
+/* VECTOR UNPACK LOGICAL HIGH */
+    F(0xe7d5, VUPLH,   VRR_a, V,   0, 0, 0, 0, vup, 0, IF_VEC)
+/* VECTOR UNPACK LOW */
+    F(0xe7d6, VUPL,    VRR_a, V,   0, 0, 0, 0, vup, 0, IF_VEC)
+/* VECTOR UNPACK LOGICAL LOW */
+    F(0xe7d4, VUPLL,   VRR_a, V,   0, 0, 0, 0, vup, 0, IF_VEC)
+
+/* === Vector Integer Instructions === */
+
+/* VECTOR ADD */
+    F(0xe7f3, VA,      VRR_c, V,   0, 0, 0, 0, va, 0, IF_VEC)
+/* VECTOR ADD COMPUTE CARRY */
+    F(0xe7f1, VACC,    VRR_c, V,   0, 0, 0, 0, vacc, 0, IF_VEC)
+/* VECTOR ADD WITH CARRY */
+    F(0xe7bb, VAC,     VRR_d, V,   0, 0, 0, 0, vac, 0, IF_VEC)
+/* VECTOR ADD WITH CARRY COMPUTE CARRY */
+    F(0xe7b9, VACCC,   VRR_d, V,   0, 0, 0, 0, vaccc, 0, IF_VEC)
+/* VECTOR AND */
+    F(0xe768, VN,      VRR_c, V,   0, 0, 0, 0, vn, 0, IF_VEC)
+/* VECTOR AND WITH COMPLEMENT */
+    F(0xe769, VNC,     VRR_c, V,   0, 0, 0, 0, vnc, 0, IF_VEC)
+/* VECTOR AVERAGE */
+    F(0xe7f2, VAVG,    VRR_c, V,   0, 0, 0, 0, vavg, 0, IF_VEC)
+/* VECTOR AVERAGE LOGICAL */
+    F(0xe7f0, VAVGL,   VRR_c, V,   0, 0, 0, 0, vavgl, 0, IF_VEC)
+/* VECTOR CHECKSUM */
+    F(0xe766, VCKSM,   VRR_c, V,   0, 0, 0, 0, vcksm, 0, IF_VEC)
+/* VECTOR ELEMENT COMPARE */
+    F(0xe7db, VEC,     VRR_a, V,   0, 0, 0, 0, vec, cmps64, IF_VEC)
+/* VECTOR ELEMENT COMPARE LOGICAL */
+    F(0xe7d9, VECL,    VRR_a, V,   0, 0, 0, 0, vec, cmpu64, IF_VEC)
+/* VECTOR COMPARE EQUAL */
+    E(0xe7f8, VCEQ,    VRR_b, V,   0, 0, 0, 0, vc, 0, TCG_COND_EQ, IF_VEC)
+/* VECTOR COMPARE HIGH */
+    E(0xe7fb, VCH,     VRR_b, V,   0, 0, 0, 0, vc, 0, TCG_COND_GT, IF_VEC)
+/* VECTOR COMPARE HIGH LOGICAL */
+    E(0xe7f9, VCHL,    VRR_b, V,   0, 0, 0, 0, vc, 0, TCG_COND_GTU, IF_VEC)
+/* VECTOR COUNT LEADING ZEROS */
+    F(0xe753, VCLZ,    VRR_a, V,   0, 0, 0, 0, vclz, 0, IF_VEC)
+/* VECTOR COUNT TRAILING ZEROS */
+    F(0xe752, VCTZ,    VRR_a, V,   0, 0, 0, 0, vctz, 0, IF_VEC)
+/* VECTOR EXCLUSIVE OR */
+    F(0xe76d, VX,      VRR_c, V,   0, 0, 0, 0, vx, 0, IF_VEC)
+/* VECTOR GALOIS FIELD MULTIPLY SUM */
+    F(0xe7b4, VGFM,    VRR_c, V,   0, 0, 0, 0, vgfm, 0, IF_VEC)
+/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */
+    F(0xe7bc, VGFMA,   VRR_d, V,   0, 0, 0, 0, vgfma, 0, IF_VEC)
+/* VECTOR LOAD COMPLEMENT */
+    F(0xe7de, VLC,     VRR_a, V,   0, 0, 0, 0, vlc, 0, IF_VEC)
+/* VECTOR LOAD POSITIVE */
+    F(0xe7df, VLP,     VRR_a, V,   0, 0, 0, 0, vlp, 0, IF_VEC)
+/* VECTOR MAXIMUM */
+    F(0xe7ff, VMX,     VRR_c, V,   0, 0, 0, 0, vmx, 0, IF_VEC)
+/* VECTOR MAXIMUM LOGICAL */
+    F(0xe7fd, VMXL,    VRR_c, V,   0, 0, 0, 0, vmx, 0, IF_VEC)
+/* VECTOR MINIMUM */
+    F(0xe7fe, VMN,     VRR_c, V,   0, 0, 0, 0, vmx, 0, IF_VEC)
+/* VECTOR MINIMUM LOGICAL */
+    F(0xe7fc, VMNL,    VRR_c, V,   0, 0, 0, 0, vmx, 0, IF_VEC)
+/* VECTOR MULTIPLY AND ADD LOW */
+    F(0xe7aa, VMAL,    VRR_d, V,   0, 0, 0, 0, vma, 0, IF_VEC)
+/* VECTOR MULTIPLY AND ADD HIGH */
+    F(0xe7ab, VMAH,    VRR_d, V,   0, 0, 0, 0, vma, 0, IF_VEC)
+/* VECTOR MULTIPLY AND ADD LOGICAL HIGH */
+    F(0xe7a9, VMALH,   VRR_d, V,   0, 0, 0, 0, vma, 0, IF_VEC)
+/* VECTOR MULTIPLY AND ADD EVEN */
+    F(0xe7ae, VMAE,    VRR_d, V,   0, 0, 0, 0, vma, 0, IF_VEC)
+/* VECTOR MULTIPLY AND ADD LOGICAL EVEN */
+    F(0xe7ac, VMALE,   VRR_d, V,   0, 0, 0, 0, vma, 0, IF_VEC)
+/* VECTOR MULTIPLY AND ADD ODD */
+    F(0xe7af, VMAO,    VRR_d, V,   0, 0, 0, 0, vma, 0, IF_VEC)
+/* VECTOR MULTIPLY AND ADD LOGICAL ODD */
+    F(0xe7ad, VMALO,   VRR_d, V,   0, 0, 0, 0, vma, 0, IF_VEC)
+/* VECTOR MULTIPLY HIGH */
+    F(0xe7a3, VMH,     VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
+/* VECTOR MULTIPLY LOGICAL HIGH */
+    F(0xe7a1, VMLH,    VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
+/* VECTOR MULTIPLY LOW */
+    F(0xe7a2, VML,     VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
+/* VECTOR MULTIPLY EVEN */
+    F(0xe7a6, VME,     VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
+/* VECTOR MULTIPLY LOGICAL EVEN */
+    F(0xe7a4, VMLE,    VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
+/* VECTOR MULTIPLY ODD */
+    F(0xe7a7, VMO,     VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
+/* VECTOR MULTIPLY LOGICAL ODD */
+    F(0xe7a5, VMLO,    VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
+/* VECTOR MULTIPLY SUM LOGICAL */
+    F(0xe7b8, VMSL,    VRR_d, VE,  0, 0, 0, 0, vmsl, 0, IF_VEC)
+/* VECTOR NAND */
+    F(0xe76e, VNN,     VRR_c, VE,  0, 0, 0, 0, vnn, 0, IF_VEC)
+/* VECTOR NOR */
+    F(0xe76b, VNO,     VRR_c, V,   0, 0, 0, 0, vno, 0, IF_VEC)
+/* VECTOR NOT EXCLUSIVE OR */
+    F(0xe76c, VNX,     VRR_c, VE,  0, 0, 0, 0, vnx, 0, IF_VEC)
+/* VECTOR OR */
+    F(0xe76a, VO,      VRR_c, V,   0, 0, 0, 0, vo, 0, IF_VEC)
+/* VECTOR OR WITH COMPLEMENT */
+    F(0xe76f, VOC,     VRR_c, VE,  0, 0, 0, 0, voc, 0, IF_VEC)
+/* VECTOR POPULATION COUNT */
+    F(0xe750, VPOPCT,  VRR_a, V,   0, 0, 0, 0, vpopct, 0, IF_VEC)
+/* VECTOR ELEMENT ROTATE LEFT LOGICAL */
+    F(0xe773, VERLLV,  VRR_c, V,   0, 0, 0, 0, vesv, 0, IF_VEC)
+    F(0xe733, VERLL,   VRS_a, V,   la2, 0, 0, 0, ves, 0, IF_VEC)
+/* VECTOR ELEMENT ROTATE AND INSERT UNDER MASK */
+    F(0xe772, VERIM,   VRI_d, V,   0, 0, 0, 0, verim, 0, IF_VEC)
+/* VECTOR ELEMENT SHIFT LEFT */
+    F(0xe770, VESLV,   VRR_c, V,   0, 0, 0, 0, vesv, 0, IF_VEC)
+    F(0xe730, VESL,    VRS_a, V,   la2, 0, 0, 0, ves, 0, IF_VEC)
+/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */
+    F(0xe77a, VESRAV,  VRR_c, V,   0, 0, 0, 0, vesv, 0, IF_VEC)
+    F(0xe73a, VESRA,   VRS_a, V,   la2, 0, 0, 0, ves, 0, IF_VEC)
+/* VECTOR ELEMENT SHIFT RIGHT LOGICAL */
+    F(0xe778, VESRLV,  VRR_c, V,   0, 0, 0, 0, vesv, 0, IF_VEC)
+    F(0xe738, VESRL,   VRS_a, V,   la2, 0, 0, 0, ves, 0, IF_VEC)
+/* VECTOR SHIFT LEFT */
+    F(0xe774, VSL,     VRR_c, V,   0, 0, 0, 0, vsl, 0, IF_VEC)
+/* VECTOR SHIFT LEFT BY BYTE */
+    F(0xe775, VSLB,    VRR_c, V,   0, 0, 0, 0, vsl, 0, IF_VEC)
+/* VECTOR SHIFT LEFT DOUBLE BY BYTE */
+    F(0xe777, VSLDB,   VRI_d, V,   0, 0, 0, 0, vsldb, 0, IF_VEC)
+/* VECTOR SHIFT RIGHT ARITHMETIC */
+    F(0xe77e, VSRA,    VRR_c, V,   0, 0, 0, 0, vsra, 0, IF_VEC)
+/* VECTOR SHIFT RIGHT ARITHMETIC BY BYTE */
+    F(0xe77f, VSRAB,   VRR_c, V,   0, 0, 0, 0, vsra, 0, IF_VEC)
+/* VECTOR SHIFT RIGHT LOGICAL */
+    F(0xe77c, VSRL,    VRR_c, V,   0, 0, 0, 0, vsrl, 0, IF_VEC)
+/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
+    F(0xe77d, VSRLB,   VRR_c, V,   0, 0, 0, 0, vsrl, 0, IF_VEC)
+/* VECTOR SUBTRACT */
+    F(0xe7f7, VS,      VRR_c, V,   0, 0, 0, 0, vs, 0, IF_VEC)
+/* VECTOR SUBTRACT COMPUTE BORROW INDICATION */
+    F(0xe7f5, VSCBI,   VRR_c, V,   0, 0, 0, 0, vscbi, 0, IF_VEC)
+/* VECTOR SUBTRACT WITH BORROW INDICATION */
+    F(0xe7bf, VSBI,    VRR_d, V,   0, 0, 0, 0, vsbi, 0, IF_VEC)
+/* VECTOR SUBTRACT WITH BORROW COMPUTE BORROW INDICATION */
+    F(0xe7bd, VSBCBI,  VRR_d, V,   0, 0, 0, 0, vsbcbi, 0, IF_VEC)
+/* VECTOR SUM ACROSS DOUBLEWORD */
+    F(0xe765, VSUMG,   VRR_c, V,   0, 0, 0, 0, vsumg, 0, IF_VEC)
+/* VECTOR SUM ACROSS QUADWORD */
+    F(0xe767, VSUMQ,   VRR_c, V,   0, 0, 0, 0, vsumq, 0, IF_VEC)
+/* VECTOR SUM ACROSS WORD */
+    F(0xe764, VSUM,    VRR_c, V,   0, 0, 0, 0, vsum, 0, IF_VEC)
+/* VECTOR TEST UNDER MASK */
+    F(0xe7d8, VTM,     VRR_a, V,   0, 0, 0, 0, vtm, 0, IF_VEC)
+
+/* === Vector String Instructions === */
+
+/* VECTOR FIND ANY ELEMENT EQUAL */
+    F(0xe782, VFAE,    VRR_b, V,   0, 0, 0, 0, vfae, 0, IF_VEC)
+/* VECTOR FIND ELEMENT EQUAL */
+    F(0xe780, VFEE,    VRR_b, V,   0, 0, 0, 0, vfee, 0, IF_VEC)
+/* VECTOR FIND ELEMENT NOT EQUAL */
+    F(0xe781, VFENE,   VRR_b, V,   0, 0, 0, 0, vfene, 0, IF_VEC)
+/* VECTOR ISOLATE STRING */
+    F(0xe75c, VISTR,   VRR_a, V,   0, 0, 0, 0, vistr, 0, IF_VEC)
+/* VECTOR STRING RANGE COMPARE */
+    F(0xe78a, VSTRC,   VRR_d, V,   0, 0, 0, 0, vstrc, 0, IF_VEC)
+
+/* === Vector Floating-Point Instructions */
+
+/* VECTOR FP ADD */
+    F(0xe7e3, VFA,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR FP COMPARE SCALAR */
+    F(0xe7cb, WFC,     VRR_a, V,   0, 0, 0, 0, wfc, 0, IF_VEC)
+/* VECTOR FP COMPARE AND SIGNAL SCALAR */
+    F(0xe7ca, WFK,     VRR_a, V,   0, 0, 0, 0, wfc, 0, IF_VEC)
+/* VECTOR FP COMPARE EQUAL */
+    F(0xe7e8, VFCE,    VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
+/* VECTOR FP COMPARE HIGH */
+    F(0xe7eb, VFCH,    VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
+/* VECTOR FP COMPARE HIGH OR EQUAL */
+    F(0xe7ea, VFCHE,   VRR_c, V,   0, 0, 0, 0, vfc, 0, IF_VEC)
+/* VECTOR FP CONVERT FROM FIXED 64-BIT */
+    F(0xe7c3, VCDG,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP CONVERT FROM LOGICAL 64-BIT */
+    F(0xe7c1, VCDLG,   VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP CONVERT TO FIXED 64-BIT */
+    F(0xe7c2, VCGD,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP CONVERT TO LOGICAL 64-BIT */
+    F(0xe7c0, VCLGD,   VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP DIVIDE */
+    F(0xe7e5, VFD,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR LOAD FP INTEGER */
+    F(0xe7c7, VFI,     VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP LOAD LENGTHENED */
+    F(0xe7c4, VFLL,    VRR_a, V,   0, 0, 0, 0, vfll, 0, IF_VEC)
+/* VECTOR FP LOAD ROUNDED */
+    F(0xe7c5, VFLR,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP MAXIMUM */
+    F(0xe7ef, VFMAX,   VRR_c, VE,  0, 0, 0, 0, vfmax, 0, IF_VEC)
+/* VECTOR FP MINIMUM */
+    F(0xe7ee, VFMIN,   VRR_c, VE,  0, 0, 0, 0, vfmax, 0, IF_VEC)
+/* VECTOR FP MULTIPLY */
+    F(0xe7e7, VFM,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR FP MULTIPLY AND ADD */
+    F(0xe78f, VFMA,    VRR_e, V,   0, 0, 0, 0, vfma, 0, IF_VEC)
+/* VECTOR FP MULTIPLY AND SUBTRACT */
+    F(0xe78e, VFMS,    VRR_e, V,   0, 0, 0, 0, vfma, 0, IF_VEC)
+/* VECTOR FP NEGATIVE MULTIPLY AND ADD */
+    F(0xe79f, VFNMA,   VRR_e, VE,  0, 0, 0, 0, vfma, 0, IF_VEC)
+/* VECTOR FP NEGATIVE MULTIPLY AND SUBTRACT */
+    F(0xe79e, VFNMS,   VRR_e, VE,   0, 0, 0, 0, vfma, 0, IF_VEC)
+/* VECTOR FP PERFORM SIGN OPERATION */
+    F(0xe7cc, VFPSO,   VRR_a, V,   0, 0, 0, 0, vfpso, 0, IF_VEC)
+/* VECTOR FP SQUARE ROOT */
+    F(0xe7ce, VFSQ,    VRR_a, V,   0, 0, 0, 0, vfsq, 0, IF_VEC)
+/* VECTOR FP SUBTRACT */
+    F(0xe7e2, VFS,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
+/* VECTOR FP TEST DATA CLASS IMMEDIATE */
+    F(0xe74a, VFTCI,   VRI_e, V,   0, 0, 0, 0, vftci, 0, IF_VEC)
+
+#ifndef CONFIG_USER_ONLY
+/* COMPARE AND SWAP AND PURGE */
+    E(0xb250, CSP,     RRE,   Z,   r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV)
+    E(0xb98a, CSPG,    RRE, DAT_ENH, r1_o, ra2, r1_P, 0, csp, 0, MO_TEQ, IF_PRIV)
+/* DIAGNOSE (KVM hypercall) */
+    F(0x8300, DIAG,    RSI,   Z,   0, 0, 0, 0, diag, 0, IF_PRIV | IF_IO)
+/* INSERT STORAGE KEY EXTENDED */
+    F(0xb229, ISKE,    RRE,   Z,   0, r2_o, new, r1_8, iske, 0, IF_PRIV)
+/* INVALIDATE DAT TABLE ENTRY */
+    F(0xb98e, IPDE,    RRF_b, Z,   r1_o, r2_o, 0, 0, idte, 0, IF_PRIV)
+/* INVALIDATE PAGE TABLE ENTRY */
+    F(0xb221, IPTE,    RRF_a, Z,   r1_o, r2_o, 0, 0, ipte, 0, IF_PRIV)
+/* LOAD CONTROL */
+    F(0xb700, LCTL,    RS_a,  Z,   0, a2, 0, 0, lctl, 0, IF_PRIV)
+    F(0xeb2f, LCTLG,   RSY_a, Z,   0, a2, 0, 0, lctlg, 0, IF_PRIV)
+/* LOAD PROGRAM PARAMETER */
+    F(0xb280, LPP,     S,   LPP,   0, m2_64, 0, 0, lpp, 0, IF_PRIV)
+/* LOAD PSW */
+    F(0x8200, LPSW,    S,     Z,   0, a2, 0, 0, lpsw, 0, IF_PRIV)
+/* LOAD PSW EXTENDED */
+    F(0xb2b2, LPSWE,   S,     Z,   0, a2, 0, 0, lpswe, 0, IF_PRIV)
+/* LOAD REAL ADDRESS */
+    F(0xb100, LRA,     RX_a,  Z,   0, a2, r1, 0, lra, 0, IF_PRIV)
+    F(0xe313, LRAY,    RXY_a, LD,  0, a2, r1, 0, lra, 0, IF_PRIV)
+    F(0xe303, LRAG,    RXY_a, Z,   0, a2, r1, 0, lra, 0, IF_PRIV)
+/* LOAD USING REAL ADDRESS */
+    E(0xb24b, LURA,    RRE,   Z,   0, ra2, new, r1_32, lura, 0, MO_TEUL, IF_PRIV)
+    E(0xb905, LURAG,   RRE,   Z,   0, ra2, r1, 0, lura, 0, MO_TEQ, IF_PRIV)
+/* MOVE TO PRIMARY */
+    F(0xda00, MVCP,    SS_d,  Z,   la1, a2, 0, 0, mvcp, 0, IF_PRIV)
+/* MOVE TO SECONDARY */
+    F(0xdb00, MVCS,    SS_d,  Z,   la1, a2, 0, 0, mvcs, 0, IF_PRIV)
+/* PURGE TLB */
+    F(0xb20d, PTLB,    S,     Z,   0, 0, 0, 0, ptlb, 0, IF_PRIV)
+/* RESET REFERENCE BIT EXTENDED */
+    F(0xb22a, RRBE,    RRE,   Z,   0, r2_o, 0, 0, rrbe, 0, IF_PRIV)
+/* SERVICE CALL LOGICAL PROCESSOR (PV hypercall) */
+    F(0xb220, SERVC,   RRE,   Z,   r1_o, r2_o, 0, 0, servc, 0, IF_PRIV | IF_IO)
+/* SET ADDRESS SPACE CONTROL FAST */
+    F(0xb279, SACF,    S,     Z,   0, a2, 0, 0, sacf, 0, IF_PRIV)
+/* SET CLOCK */
+    F(0xb204, SCK,     S,     Z,   la2, 0, 0, 0, sck, 0, IF_PRIV | IF_IO)
+/* SET CLOCK COMPARATOR */
+    F(0xb206, SCKC,    S,     Z,   0, m2_64a, 0, 0, sckc, 0, IF_PRIV | IF_IO)
+/* SET CLOCK PROGRAMMABLE FIELD */
+    F(0x0107, SCKPF,   E,     Z,   0, 0, 0, 0, sckpf, 0, IF_PRIV)
+/* SET CPU TIMER */
+    F(0xb208, SPT,     S,     Z,   0, m2_64a, 0, 0, spt, 0, IF_PRIV | IF_IO)
+/* SET PREFIX */
+    F(0xb210, SPX,     S,     Z,   0, m2_32ua, 0, 0, spx, 0, IF_PRIV)
+/* SET PSW KEY FROM ADDRESS */
+    F(0xb20a, SPKA,    S,     Z,   0, a2, 0, 0, spka, 0, IF_PRIV)
+/* SET STORAGE KEY EXTENDED */
+    F(0xb22b, SSKE,    RRF_c, Z,   r1_o, r2_o, 0, 0, sske, 0, IF_PRIV)
+/* SET SYSTEM MASK */
+    F(0x8000, SSM,     S,     Z,   0, m2_8u, 0, 0, ssm, 0, IF_PRIV)
+/* SIGNAL PROCESSOR */
+    F(0xae00, SIGP,    RS_a,  Z,   0, a2, 0, 0, sigp, 0, IF_PRIV | IF_IO)
+/* STORE CLOCK COMPARATOR */
+    F(0xb207, STCKC,   S,     Z,   la2, 0, new, m1_64a, stckc, 0, IF_PRIV)
+/* STORE CONTROL */
+    F(0xb600, STCTL,   RS_a,  Z,   0, a2, 0, 0, stctl, 0, IF_PRIV)
+    F(0xeb25, STCTG,   RSY_a, Z,   0, a2, 0, 0, stctg, 0, IF_PRIV)
+/* STORE CPU ADDRESS */
+    F(0xb212, STAP,    S,     Z,   la2, 0, new, m1_16a, stap, 0, IF_PRIV)
+/* STORE CPU ID */
+    F(0xb202, STIDP,   S,     Z,   la2, 0, new, m1_64a, stidp, 0, IF_PRIV)
+/* STORE CPU TIMER */
+    F(0xb209, STPT,    S,     Z,   la2, 0, new, m1_64a, stpt, 0, IF_PRIV | IF_IO)
+/* STORE FACILITY LIST */
+    F(0xb2b1, STFL,    S,     Z,   0, 0, 0, 0, stfl, 0, IF_PRIV)
+/* STORE PREFIX */
+    F(0xb211, STPX,    S,     Z,   la2, 0, new, m1_32a, stpx, 0, IF_PRIV)
+/* STORE SYSTEM INFORMATION */
+    F(0xb27d, STSI,    S,     Z,   0, a2, 0, 0, stsi, 0, IF_PRIV)
+/* STORE THEN AND SYSTEM MASK */
+    F(0xac00, STNSM,   SI,    Z,   la1, 0, 0, 0, stnosm, 0, IF_PRIV)
+/* STORE THEN OR SYSTEM MASK */
+    F(0xad00, STOSM,   SI,    Z,   la1, 0, 0, 0, stnosm, 0, IF_PRIV)
+/* STORE USING REAL ADDRESS */
+    E(0xb246, STURA,   RRE,   Z,   r1_o, ra2, 0, 0, stura, 0, MO_TEUL, IF_PRIV)
+    E(0xb925, STURG,   RRE,   Z,   r1_o, ra2, 0, 0, stura, 0, MO_TEQ, IF_PRIV)
+/* TEST BLOCK */
+    F(0xb22c, TB,      RRE,   Z,   0, r2_o, 0, 0, testblock, 0, IF_PRIV)
+/* TEST PROTECTION */
+    C(0xe501, TPROT,   SSE,   Z,   la1, a2, 0, 0, tprot, 0)
+
+/* CCW I/O Instructions */
+    F(0xb276, XSCH,    S,     Z,   0, 0, 0, 0, xsch, 0, IF_PRIV | IF_IO)
+    F(0xb230, CSCH,    S,     Z,   0, 0, 0, 0, csch, 0, IF_PRIV | IF_IO)
+    F(0xb231, HSCH,    S,     Z,   0, 0, 0, 0, hsch, 0, IF_PRIV | IF_IO)
+    F(0xb232, MSCH,    S,     Z,   0, insn, 0, 0, msch, 0, IF_PRIV | IF_IO)
+    F(0xb23b, RCHP,    S,     Z,   0, 0, 0, 0, rchp, 0, IF_PRIV | IF_IO)
+    F(0xb238, RSCH,    S,     Z,   0, 0, 0, 0, rsch, 0, IF_PRIV | IF_IO)
+    F(0xb237, SAL,     S,     Z,   0, 0, 0, 0, sal, 0, IF_PRIV | IF_IO)
+    F(0xb23c, SCHM,    S,     Z,   0, insn, 0, 0, schm, 0, IF_PRIV | IF_IO)
+    F(0xb274, SIGA,    S,     Z,   0, 0, 0, 0, siga, 0, IF_PRIV | IF_IO)
+    F(0xb23a, STCPS,   S,     Z,   0, 0, 0, 0, stcps, 0, IF_PRIV | IF_IO)
+    F(0xb233, SSCH,    S,     Z,   0, insn, 0, 0, ssch, 0, IF_PRIV | IF_IO)
+    F(0xb239, STCRW,   S,     Z,   0, insn, 0, 0, stcrw, 0, IF_PRIV | IF_IO)
+    F(0xb234, STSCH,   S,     Z,   0, insn, 0, 0, stsch, 0, IF_PRIV | IF_IO)
+    F(0xb236, TPI ,    S,     Z,   la2, 0, 0, 0, tpi, 0, IF_PRIV | IF_IO)
+    F(0xb235, TSCH,    S,     Z,   0, insn, 0, 0, tsch, 0, IF_PRIV | IF_IO)
+    /* ??? Not listed in PoO ninth edition, but there's a linux driver that
+       uses it: "A CHSC subchannel is usually present on LPAR only."  */
+    F(0xb25f, CHSC,  RRE,     Z,   0, insn, 0, 0, chsc, 0, IF_PRIV | IF_IO)
+
+/* zPCI Instructions */
+    /* None of these instructions are documented in the PoP, so this is all
+       based upon target/s390x/kvm.c and Linux code and likely incomplete */
+    F(0xebd0, PCISTB, RSY_a, PCI, la2, 0, 0, 0, pcistb, 0, IF_PRIV | IF_IO)
+    F(0xebd1, SIC, RSY_a, AIS, r1, r3, 0, 0, sic, 0, IF_PRIV | IF_IO)
+    F(0xb9a0, CLP, RRF_c, PCI, 0, 0, 0, 0, clp, 0, IF_PRIV | IF_IO)
+    F(0xb9d0, PCISTG, RRE, PCI, 0, 0, 0, 0, pcistg, 0, IF_PRIV | IF_IO)
+    F(0xb9d2, PCILG, RRE, PCI, 0, 0, 0, 0, pcilg, 0, IF_PRIV | IF_IO)
+    F(0xb9d3, RPCIT, RRE, PCI, 0, 0, 0, 0, rpcit, 0, IF_PRIV | IF_IO)
+    F(0xe3d0, MPCIFC, RXY_a, PCI, la2, 0, 0, 0, mpcifc, 0, IF_PRIV | IF_IO)
+    F(0xe3d4, STPCIFC, RXY_a, PCI, la2, 0, 0, 0, stpcifc, 0, IF_PRIV | IF_IO)
+
+#endif /* CONFIG_USER_ONLY */
diff --git a/target/s390x/tcg/insn-format.def b/target/s390x/tcg/insn-format.def
new file mode 100644
index 0000000000..6253edbd19
--- /dev/null
+++ b/target/s390x/tcg/insn-format.def
@@ -0,0 +1,81 @@
+/* Description of s390 insn formats.  */
+/* NAME   F1,          F2... */
+F0(E)
+F1(I,     I(1, 8, 8))
+F2(RI_a,  R(1, 8),     I(2,16,16))
+F2(RI_b,  R(1, 8),     I(2,16,16))
+F2(RI_c,  M(1, 8),     I(2,16,16))
+F3(RIE_a, R(1, 8),     I(2,16,16),  M(3,32))
+F4(RIE_b, R(1, 8),     R(2,12),     M(3,32),   I(4,16,16))
+F4(RIE_c, R(1, 8),     I(2,32, 8),  M(3,12),   I(4,16,16))
+F3(RIE_d, R(1, 8),     I(2,16,16),  R(3,12))
+F3(RIE_e, R(1, 8),     I(2,16,16),  R(3,12))
+F5(RIE_f, R(1, 8),     R(2,12),     I(3,16,8), I(4,24,8),  I(5,32,8))
+F3(RIE_g, R(1, 8),     I(2,16,16),  M(3,12))
+F2(RIL_a, R(1, 8),     I(2,16,32))
+F2(RIL_b, R(1, 8),     I(2,16,32))
+F2(RIL_c, M(1, 8),     I(2,16,32))
+F4(RIS,   R(1, 8),     I(2,32, 8),  M(3,12),   BD(4,16,20))
+/* ??? The PoO does not call out subtypes _a and _b for RR, as it does
+   for e.g. RX.  Our checking requires this for e.g. BCR.  */
+F2(RR_a,  R(1, 8),     R(2,12))
+F2(RR_b,  M(1, 8),     R(2,12))
+F2(RRE,   R(1,24),     R(2,28))
+F3(RRD,   R(1,16),     R(2,28),     R(3,24))
+F4(RRF_a, R(1,24),     R(2,28),     R(3,16),   M(4,20))
+F4(RRF_b, R(1,24),     R(2,28),     R(3,16),   M(4,20))
+F4(RRF_c, R(1,24),     R(2,28),     M(3,16),   M(4,20))
+F4(RRF_d, R(1,24),     R(2,28),     M(3,16),   M(4,20))
+F4(RRF_e, R(1,24),     R(2,28),     M(3,16),   M(4,20))
+F4(RRS,   R(1, 8),     R(2,12),     M(3,32),   BD(4,16,20))
+F3(RS_a,  R(1, 8),     BD(2,16,20), R(3,12))
+F3(RS_b,  R(1, 8),     BD(2,16,20), M(3,12))
+F3(RSI,   R(1, 8),     I(2,16,16),  R(3,12))
+F2(RSL,   L(1, 8, 4),  BD(1,16,20))
+F3(RSY_a, R(1, 8),     BDL(2),      R(3,12))
+F3(RSY_b, R(1, 8),     BDL(2),      M(3,12))
+F2(RX_a,  R(1, 8),     BXD(2))
+F2(RX_b,  M(1, 8),     BXD(2))
+F3(RXE,   R(1, 8),     BXD(2),      M(3,32))
+F3(RXF,   R(1,32),     BXD(2),      R(3, 8))
+F2(RXY_a, R(1, 8),     BXDL(2))
+F2(RXY_b, M(1, 8),     BXDL(2))
+F1(S,     BD(2,16,20))
+F2(SI,    BD(1,16,20), I(2,8,8))
+F2(SIL,   BD(1,16,20), I(2,32,16))
+F2(SIY,   BDL(1),      I(2, 8, 8))
+F3(SS_a,  L(1, 8, 8),  BD(1,16,20), BD(2,32,36))
+F4(SS_b,  L(1, 8, 4),  BD(1,16,20), L(2,12,4),   BD(2,32,36))
+F4(SS_c,  L(1, 8, 4),  BD(1,16,20), BD(2,32,36), I(3,12, 4))
+/* ??? Odd man out.  The L1 field here is really a register, but the
+   easy way to compress the fields has R1 and B1 overlap.  */
+F4(SS_d,  L(1, 8, 4),  BD(1,16,20), BD(2,32,36), R(3,12))
+F4(SS_e,  R(1, 8),     BD(2,16,20), R(3,12),     BD(4,32,36))
+F3(SS_f,  BD(1,16,20), L(2,8,8),    BD(2,32,36))
+F2(SSE,   BD(1,16,20), BD(2,32,36))
+F3(SSF,   BD(1,16,20), BD(2,32,36), R(3,8))
+F3(VRI_a, V(1,8),      I(2,16,16),  M(3,32))
+F4(VRI_b, V(1,8),      I(2,16,8),   I(3,24,8),   M(4,32))
+F4(VRI_c, V(1,8),      V(3,12),     I(2,16,16),  M(4,32))
+F5(VRI_d, V(1,8),      V(2,12),     V(3,16),     I(4,24,8),   M(5,32))
+F5(VRI_e, V(1,8),      V(2,12),     I(3,16,12),  M(5,28),     M(4,32))
+F5(VRI_f, V(1,8),      V(2,12),     V(3,16),     M(5,24),     I(4,28,8))
+F5(VRI_g, V(1,8),      V(2,12),     I(4,16,8),   M(5,24),     I(3,28,8))
+F3(VRI_h, V(1,8),      I(2,16,16),  I(3,32,4))
+F4(VRI_i, V(1,8),      R(2,12),     M(4,24),     I(3,28,8))
+F5(VRR_a, V(1,8),      V(2,12),     M(5,24),     M(4,28),     M(3,32))
+F5(VRR_b, V(1,8),      V(2,12),     V(3,16),     M(5,24),     M(4,32))
+F6(VRR_c, V(1,8),      V(2,12),     V(3,16),     M(6,24),     M(5,28),  M(4,32))
+F6(VRR_d, V(1,8),      V(2,12),     V(3,16),     M(5,20),     M(6,24),  V(4,32))
+F6(VRR_e, V(1,8),      V(2,12),     V(3,16),     M(6,20),     M(5,28),  V(4,32))
+F3(VRR_f, V(1,8),      R(2,12),     R(3,16))
+F1(VRR_g, V(1,12))
+F3(VRR_h, V(1,12),     V(2,16),     M(3,24))
+F3(VRR_i, R(1,8),      V(2,12),     M(3,24))
+F4(VRS_a, V(1,8),      V(3,12),     BD(2,16,20), M(4,32))
+F4(VRS_b, V(1,8),      R(3,12),     BD(2,16,20), M(4,32))
+F4(VRS_c, R(1,8),      V(3,12),     BD(2,16,20), M(4,32))
+F3(VRS_d, R(3,12),     BD(2,16,20), V(1,32))
+F4(VRV,   V(1,8),      V(2,12),     BD(2,16,20), M(3,32))
+F3(VRX,   V(1,8),      BXD(2),      M(3,32))
+F3(VSI,   I(3,8,8),    BD(2,16,20), V(1,32))
diff --git a/target/s390x/tcg/int_helper.c b/target/s390x/tcg/int_helper.c
new file mode 100644
index 0000000000..954542388a
--- /dev/null
+++ b/target/s390x/tcg/int_helper.c
@@ -0,0 +1,148 @@
+/*
+ *  S/390 integer helper routines
+ *
+ *  Copyright (c) 2009 Ulrich Hecht
+ *  Copyright (c) 2009 Alexander Graf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "tcg_s390x.h"
+#include "exec/exec-all.h"
+#include "qemu/host-utils.h"
+#include "exec/helper-proto.h"
+
+/* #define DEBUG_HELPER */
+#ifdef DEBUG_HELPER
+#define HELPER_LOG(x...) qemu_log(x)
+#else
+#define HELPER_LOG(x...)
+#endif
+
+/* 64/32 -> 32 signed division */
+int64_t HELPER(divs32)(CPUS390XState *env, int64_t a, int64_t b64)
+{
+    int32_t ret, b = b64;
+    int64_t q;
+
+    if (b == 0) {
+        tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
+    }
+
+    ret = q = a / b;
+    env->retxl = a % b;
+
+    /* Catch non-representable quotient.  */
+    if (ret != q) {
+        tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
+    }
+
+    return ret;
+}
+
+/* 64/32 -> 32 unsigned division */
+uint64_t HELPER(divu32)(CPUS390XState *env, uint64_t a, uint64_t b64)
+{
+    uint32_t ret, b = b64;
+    uint64_t q;
+
+    if (b == 0) {
+        tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
+    }
+
+    ret = q = a / b;
+    env->retxl = a % b;
+
+    /* Catch non-representable quotient.  */
+    if (ret != q) {
+        tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
+    }
+
+    return ret;
+}
+
+/* 64/64 -> 64 signed division */
+int64_t HELPER(divs64)(CPUS390XState *env, int64_t a, int64_t b)
+{
+    /* Catch divide by zero, and non-representable quotient (MIN / -1).  */
+    if (b == 0 || (b == -1 && a == (1ll << 63))) {
+        tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
+    }
+    env->retxl = a % b;
+    return a / b;
+}
+
+/* 128 -> 64/64 unsigned division */
+uint64_t HELPER(divu64)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                        uint64_t b)
+{
+    uint64_t ret;
+    /* Signal divide by zero.  */
+    if (b == 0) {
+        tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
+    }
+    if (ah == 0) {
+        /* 64 -> 64/64 case */
+        env->retxl = al % b;
+        ret = al / b;
+    } else {
+        /* ??? Move i386 idivq helper to host-utils.  */
+#ifdef CONFIG_INT128
+        __uint128_t a = ((__uint128_t)ah << 64) | al;
+        __uint128_t q = a / b;
+        env->retxl = a % b;
+        ret = q;
+        if (ret != q) {
+            tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC());
+        }
+#else
+        /* 32-bit hosts would need special wrapper functionality - just abort if
+           we encounter such a case; it's very unlikely anyways. */
+        cpu_abort(env_cpu(env), "128 -> 64/64 division not implemented\n");
+#endif
+    }
+    return ret;
+}
+
+uint64_t HELPER(cvd)(int32_t reg)
+{
+    /* positive 0 */
+    uint64_t dec = 0x0c;
+    int64_t bin = reg;
+    int shift;
+
+    if (bin < 0) {
+        bin = -bin;
+        dec = 0x0d;
+    }
+
+    for (shift = 4; (shift < 64) && bin; shift += 4) {
+        dec |= (bin % 10) << shift;
+        bin /= 10;
+    }
+
+    return dec;
+}
+
+uint64_t HELPER(popcnt)(uint64_t val)
+{
+    /* Note that we don't fold past bytes. */
+    val = (val & 0x5555555555555555ULL) + ((val >> 1) & 0x5555555555555555ULL);
+    val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL);
+    val = (val + (val >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
+    return val;
+}
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
new file mode 100644
index 0000000000..9bae13ecf0
--- /dev/null
+++ b/target/s390x/tcg/mem_helper.c
@@ -0,0 +1,3008 @@
+/*
+ *  S/390 memory access helper routines
+ *
+ *  Copyright (c) 2009 Ulrich Hecht
+ *  Copyright (c) 2009 Alexander Graf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "tcg_s390x.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "qemu/int128.h"
+#include "qemu/atomic128.h"
+#include "tcg/tcg.h"
+
+#if !defined(CONFIG_USER_ONLY)
+#include "hw/s390x/storage-keys.h"
+#include "hw/boards.h"
+#endif
+
+/*****************************************************************************/
+/* Softmmu support */
+
+/* #define DEBUG_HELPER */
+#ifdef DEBUG_HELPER
+#define HELPER_LOG(x...) qemu_log(x)
+#else
+#define HELPER_LOG(x...)
+#endif
+
+static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
+{
+    uint16_t pkm = env->cregs[3] >> 16;
+
+    if (env->psw.mask & PSW_MASK_PSTATE) {
+        /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
+        return pkm & (0x80 >> psw_key);
+    }
+    return true;
+}
+
+static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
+                                   uint64_t src, uint32_t len)
+{
+    if (!len || src == dest) {
+        return false;
+    }
+    /* Take care of wrapping at the end of address space. */
+    if (unlikely(wrap_address(env, src + len - 1) < src)) {
+        return dest > src || dest <= wrap_address(env, src + len - 1);
+    }
+    return dest > src && dest <= src + len - 1;
+}
+
+/* Trigger a SPECIFICATION exception if an address or a length is not
+   naturally aligned.  */
+static inline void check_alignment(CPUS390XState *env, uint64_t v,
+                                   int wordsize, uintptr_t ra)
+{
+    if (v % wordsize) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+}
+
+/* Load a value from memory according to its size.  */
+static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
+                                           int wordsize, uintptr_t ra)
+{
+    switch (wordsize) {
+    case 1:
+        return cpu_ldub_data_ra(env, addr, ra);
+    case 2:
+        return cpu_lduw_data_ra(env, addr, ra);
+    default:
+        abort();
+    }
+}
+
+/* Store a to memory according to its size.  */
+static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
+                                      uint64_t value, int wordsize,
+                                      uintptr_t ra)
+{
+    switch (wordsize) {
+    case 1:
+        cpu_stb_data_ra(env, addr, value, ra);
+        break;
+    case 2:
+        cpu_stw_data_ra(env, addr, value, ra);
+        break;
+    default:
+        abort();
+    }
+}
+
+/* An access covers at most 4096 bytes and therefore at most two pages. */
+typedef struct S390Access {
+    target_ulong vaddr1;
+    target_ulong vaddr2;
+    char *haddr1;
+    char *haddr2;
+    uint16_t size1;
+    uint16_t size2;
+    /*
+     * If we can't access the host page directly, we'll have to do I/O access
+     * via ld/st helpers. These are internal details, so we store the
+     * mmu idx to do the access here instead of passing it around in the
+     * helpers. Maybe, one day we can get rid of ld/st access - once we can
+     * handle TLB_NOTDIRTY differently. We don't expect these special accesses
+     * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
+     * pages, we might trigger a new MMU translation - very unlikely that
+     * the mapping changes in between and we would trigger a fault.
+     */
+    int mmu_idx;
+} S390Access;
+
+/*
+ * With nonfault=1, return the PGM_ exception that would have been injected
+ * into the guest; return 0 if no exception was detected.
+ *
+ * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
+ * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
+ */
+static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
+                             MMUAccessType access_type, int mmu_idx,
+                             bool nonfault, void **phost, uintptr_t ra)
+{
+    int flags;
+
+#if defined(CONFIG_USER_ONLY)
+    flags = page_get_flags(addr);
+    if (!(flags & (access_type == MMU_DATA_LOAD ?  PAGE_READ : PAGE_WRITE_ORG))) {
+        env->__excp_addr = addr;
+        flags = (flags & PAGE_VALID) ? PGM_PROTECTION : PGM_ADDRESSING;
+        if (nonfault) {
+            return flags;
+        }
+        tcg_s390_program_interrupt(env, flags, ra);
+    }
+    *phost = g2h(env_cpu(env), addr);
+#else
+    /*
+     * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
+     * to detect if there was an exception during tlb_fill().
+     */
+    env->tlb_fill_exc = 0;
+    flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
+                               ra);
+    if (env->tlb_fill_exc) {
+        return env->tlb_fill_exc;
+    }
+
+    if (unlikely(flags & TLB_WATCHPOINT)) {
+        /* S390 does not presently use transaction attributes. */
+        cpu_check_watchpoint(env_cpu(env), addr, size,
+                             MEMTXATTRS_UNSPECIFIED,
+                             (access_type == MMU_DATA_STORE
+                              ? BP_MEM_WRITE : BP_MEM_READ), ra);
+    }
+#endif
+    return 0;
+}
+
+static int access_prepare_nf(S390Access *access, CPUS390XState *env,
+                             bool nonfault, vaddr vaddr1, int size,
+                             MMUAccessType access_type,
+                             int mmu_idx, uintptr_t ra)
+{
+    void *haddr1, *haddr2 = NULL;
+    int size1, size2, exc;
+    vaddr vaddr2 = 0;
+
+    assert(size > 0 && size <= 4096);
+
+    size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
+    size2 = size - size1;
+
+    exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
+                            &haddr1, ra);
+    if (exc) {
+        return exc;
+    }
+    if (unlikely(size2)) {
+        /* The access crosses page boundaries. */
+        vaddr2 = wrap_address(env, vaddr1 + size1);
+        exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
+                                nonfault, &haddr2, ra);
+        if (exc) {
+            return exc;
+        }
+    }
+
+    *access = (S390Access) {
+        .vaddr1 = vaddr1,
+        .vaddr2 = vaddr2,
+        .haddr1 = haddr1,
+        .haddr2 = haddr2,
+        .size1 = size1,
+        .size2 = size2,
+        .mmu_idx = mmu_idx
+    };
+    return 0;
+}
+
+static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
+                                 MMUAccessType access_type, int mmu_idx,
+                                 uintptr_t ra)
+{
+    S390Access ret;
+    int exc = access_prepare_nf(&ret, env, false, vaddr, size,
+                                access_type, mmu_idx, ra);
+    assert(!exc);
+    return ret;
+}
+
+/* Helper to handle memset on a single page. */
+static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
+                             uint8_t byte, uint16_t size, int mmu_idx,
+                             uintptr_t ra)
+{
+#ifdef CONFIG_USER_ONLY
+    g_assert(haddr);
+    memset(haddr, byte, size);
+#else
+    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+    int i;
+
+    if (likely(haddr)) {
+        memset(haddr, byte, size);
+    } else {
+        /*
+         * Do a single access and test if we can then get access to the
+         * page. This is especially relevant to speed up TLB_NOTDIRTY.
+         */
+        g_assert(size > 0);
+        helper_ret_stb_mmu(env, vaddr, byte, oi, ra);
+        haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
+        if (likely(haddr)) {
+            memset(haddr + 1, byte, size - 1);
+        } else {
+            for (i = 1; i < size; i++) {
+                helper_ret_stb_mmu(env, vaddr + i, byte, oi, ra);
+            }
+        }
+    }
+#endif
+}
+
+static void access_memset(CPUS390XState *env, S390Access *desta,
+                          uint8_t byte, uintptr_t ra)
+{
+
+    do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
+                     desta->mmu_idx, ra);
+    if (likely(!desta->size2)) {
+        return;
+    }
+    do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
+                     desta->mmu_idx, ra);
+}
+
+static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
+                                  int offset, int mmu_idx, uintptr_t ra)
+{
+#ifdef CONFIG_USER_ONLY
+    return ldub_p(*haddr + offset);
+#else
+    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+    uint8_t byte;
+
+    if (likely(*haddr)) {
+        return ldub_p(*haddr + offset);
+    }
+    /*
+     * Do a single access and test if we can then get access to the
+     * page. This is especially relevant to speed up TLB_NOTDIRTY.
+     */
+    byte = helper_ret_ldub_mmu(env, vaddr + offset, oi, ra);
+    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
+    return byte;
+#endif
+}
+
+static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
+                               int offset, uintptr_t ra)
+{
+    if (offset < access->size1) {
+        return do_access_get_byte(env, access->vaddr1, &access->haddr1,
+                                  offset, access->mmu_idx, ra);
+    }
+    return do_access_get_byte(env, access->vaddr2, &access->haddr2,
+                              offset - access->size1, access->mmu_idx, ra);
+}
+
+static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
+                               int offset, uint8_t byte, int mmu_idx,
+                               uintptr_t ra)
+{
+#ifdef CONFIG_USER_ONLY
+    stb_p(*haddr + offset, byte);
+#else
+    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+
+    if (likely(*haddr)) {
+        stb_p(*haddr + offset, byte);
+        return;
+    }
+    /*
+     * Do a single access and test if we can then get access to the
+     * page. This is especially relevant to speed up TLB_NOTDIRTY.
+     */
+    helper_ret_stb_mmu(env, vaddr + offset, byte, oi, ra);
+    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
+#endif
+}
+
+static void access_set_byte(CPUS390XState *env, S390Access *access,
+                            int offset, uint8_t byte, uintptr_t ra)
+{
+    if (offset < access->size1) {
+        do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
+                           access->mmu_idx, ra);
+    } else {
+        do_access_set_byte(env, access->vaddr2, &access->haddr2,
+                           offset - access->size1, byte, access->mmu_idx, ra);
+    }
+}
+
+/*
+ * Move data with the same semantics as memmove() in case ranges don't overlap
+ * or src > dest. Undefined behavior on destructive overlaps.
+ */
+static void access_memmove(CPUS390XState *env, S390Access *desta,
+                           S390Access *srca, uintptr_t ra)
+{
+    int diff;
+
+    g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
+
+    /* Fallback to slow access in case we don't have access to all host pages */
+    if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
+                 !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
+        int i;
+
+        for (i = 0; i < desta->size1 + desta->size2; i++) {
+            uint8_t byte = access_get_byte(env, srca, i, ra);
+
+            access_set_byte(env, desta, i, byte, ra);
+        }
+        return;
+    }
+
+    if (srca->size1 == desta->size1) {
+        memmove(desta->haddr1, srca->haddr1, srca->size1);
+        if (unlikely(srca->size2)) {
+            memmove(desta->haddr2, srca->haddr2, srca->size2);
+        }
+    } else if (srca->size1 < desta->size1) {
+        diff = desta->size1 - srca->size1;
+        memmove(desta->haddr1, srca->haddr1, srca->size1);
+        memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
+        if (likely(desta->size2)) {
+            memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
+        }
+    } else {
+        diff = srca->size1 - desta->size1;
+        memmove(desta->haddr1, srca->haddr1, desta->size1);
+        memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
+        if (likely(srca->size2)) {
+            memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
+        }
+    }
+}
+
+static int mmu_idx_from_as(uint8_t as)
+{
+    switch (as) {
+    case AS_PRIMARY:
+        return MMU_PRIMARY_IDX;
+    case AS_SECONDARY:
+        return MMU_SECONDARY_IDX;
+    case AS_HOME:
+        return MMU_HOME_IDX;
+    default:
+        /* FIXME AS_ACCREG */
+        g_assert_not_reached();
+    }
+}
+
+/* and on array */
+static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
+                             uint64_t src, uintptr_t ra)
+{
+    const int mmu_idx = cpu_mmu_index(env, false);
+    S390Access srca1, srca2, desta;
+    uint32_t i;
+    uint8_t c = 0;
+
+    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
+               __func__, l, dest, src);
+
+    /* NC always processes one more byte than specified - maximum is 256 */
+    l++;
+
+    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
+    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
+    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+    for (i = 0; i < l; i++) {
+        const uint8_t x = access_get_byte(env, &srca1, i, ra) &
+                          access_get_byte(env, &srca2, i, ra);
+
+        c |= x;
+        access_set_byte(env, &desta, i, x, ra);
+    }
+    return c != 0;
+}
+
+uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
+                    uint64_t src)
+{
+    return do_helper_nc(env, l, dest, src, GETPC());
+}
+
+/* xor on array */
+static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
+                             uint64_t src, uintptr_t ra)
+{
+    const int mmu_idx = cpu_mmu_index(env, false);
+    S390Access srca1, srca2, desta;
+    uint32_t i;
+    uint8_t c = 0;
+
+    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
+               __func__, l, dest, src);
+
+    /* XC always processes one more byte than specified - maximum is 256 */
+    l++;
+
+    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
+    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
+    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+
+    /* xor with itself is the same as memset(0) */
+    if (src == dest) {
+        access_memset(env, &desta, 0, ra);
+        return 0;
+    }
+
+    for (i = 0; i < l; i++) {
+        const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
+                          access_get_byte(env, &srca2, i, ra);
+
+        c |= x;
+        access_set_byte(env, &desta, i, x, ra);
+    }
+    return c != 0;
+}
+
+uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
+                    uint64_t src)
+{
+    return do_helper_xc(env, l, dest, src, GETPC());
+}
+
+/* or on array */
+static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
+                             uint64_t src, uintptr_t ra)
+{
+    const int mmu_idx = cpu_mmu_index(env, false);
+    S390Access srca1, srca2, desta;
+    uint32_t i;
+    uint8_t c = 0;
+
+    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
+               __func__, l, dest, src);
+
+    /* OC always processes one more byte than specified - maximum is 256 */
+    l++;
+
+    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
+    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
+    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+    for (i = 0; i < l; i++) {
+        const uint8_t x = access_get_byte(env, &srca1, i, ra) |
+                          access_get_byte(env, &srca2, i, ra);
+
+        c |= x;
+        access_set_byte(env, &desta, i, x, ra);
+    }
+    return c != 0;
+}
+
+uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
+                    uint64_t src)
+{
+    return do_helper_oc(env, l, dest, src, GETPC());
+}
+
+/* memmove */
+static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
+                              uint64_t src, uintptr_t ra)
+{
+    const int mmu_idx = cpu_mmu_index(env, false);
+    S390Access srca, desta;
+    uint32_t i;
+
+    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
+               __func__, l, dest, src);
+
+    /* MVC always copies one more byte than specified - maximum is 256 */
+    l++;
+
+    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
+    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+
+    /*
+     * "When the operands overlap, the result is obtained as if the operands
+     * were processed one byte at a time". Only non-destructive overlaps
+     * behave like memmove().
+     */
+    if (dest == src + 1) {
+        access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
+    } else if (!is_destructive_overlap(env, dest, src, l)) {
+        access_memmove(env, &desta, &srca, ra);
+    } else {
+        for (i = 0; i < l; i++) {
+            uint8_t byte = access_get_byte(env, &srca, i, ra);
+
+            access_set_byte(env, &desta, i, byte, ra);
+        }
+    }
+
+    return env->cc_op;
+}
+
+void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
+{
+    do_helper_mvc(env, l, dest, src, GETPC());
+}
+
+/* move inverse  */
+void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
+{
+    const int mmu_idx = cpu_mmu_index(env, false);
+    S390Access srca, desta;
+    uintptr_t ra = GETPC();
+    int i;
+
+    /* MVCIN always copies one more byte than specified - maximum is 256 */
+    l++;
+
+    src = wrap_address(env, src - l + 1);
+    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
+    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+    for (i = 0; i < l; i++) {
+        const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
+
+        access_set_byte(env, &desta, i, x, ra);
+    }
+}
+
+/* move numerics  */
+void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
+{
+    const int mmu_idx = cpu_mmu_index(env, false);
+    S390Access srca1, srca2, desta;
+    uintptr_t ra = GETPC();
+    int i;
+
+    /* MVN always copies one more byte than specified - maximum is 256 */
+    l++;
+
+    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
+    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
+    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+    for (i = 0; i < l; i++) {
+        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
+                          (access_get_byte(env, &srca2, i, ra) & 0xf0);
+
+        access_set_byte(env, &desta, i, x, ra);
+    }
+}
+
+/* move with offset  */
+void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
+{
+    const int mmu_idx = cpu_mmu_index(env, false);
+    /* MVO always processes one more byte than specified - maximum is 16 */
+    const int len_dest = (l >> 4) + 1;
+    const int len_src = (l & 0xf) + 1;
+    uintptr_t ra = GETPC();
+    uint8_t byte_dest, byte_src;
+    S390Access srca, desta;
+    int i, j;
+
+    srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
+    desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
+
+    /* Handle rightmost byte */
+    byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
+    byte_src = access_get_byte(env, &srca, len_src - 1, ra);
+    byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
+    access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
+
+    /* Process remaining bytes from right to left */
+    for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
+        byte_dest = byte_src >> 4;
+        if (j >= 0) {
+            byte_src = access_get_byte(env, &srca, j, ra);
+        } else {
+            byte_src = 0;
+        }
+        byte_dest |= byte_src << 4;
+        access_set_byte(env, &desta, i, byte_dest, ra);
+    }
+}
+
+/* move zones  */
+void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
+{
+    const int mmu_idx = cpu_mmu_index(env, false);
+    S390Access srca1, srca2, desta;
+    uintptr_t ra = GETPC();
+    int i;
+
+    /* MVZ always copies one more byte than specified - maximum is 256 */
+    l++;
+
+    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
+    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
+    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+    for (i = 0; i < l; i++) {
+        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
+                          (access_get_byte(env, &srca2, i, ra) & 0x0f);
+
+        access_set_byte(env, &desta, i, x, ra);
+    }
+}
+
+/* compare unsigned byte arrays */
+static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
+                              uint64_t s2, uintptr_t ra)
+{
+    uint32_t i;
+    uint32_t cc = 0;
+
+    HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
+               __func__, l, s1, s2);
+
+    for (i = 0; i <= l; i++) {
+        uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
+        uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
+        HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
+        if (x < y) {
+            cc = 1;
+            break;
+        } else if (x > y) {
+            cc = 2;
+            break;
+        }
+    }
+
+    HELPER_LOG("\n");
+    return cc;
+}
+
+uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
+{
+    return do_helper_clc(env, l, s1, s2, GETPC());
+}
+
+/* compare logical under mask */
+uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
+                     uint64_t addr)
+{
+    uintptr_t ra = GETPC();
+    uint32_t cc = 0;
+
+    HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
+               mask, addr);
+
+    while (mask) {
+        if (mask & 8) {
+            uint8_t d = cpu_ldub_data_ra(env, addr, ra);
+            uint8_t r = extract32(r1, 24, 8);
+            HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
+                       addr);
+            if (r < d) {
+                cc = 1;
+                break;
+            } else if (r > d) {
+                cc = 2;
+                break;
+            }
+            addr++;
+        }
+        mask = (mask << 1) & 0xf;
+        r1 <<= 8;
+    }
+
+    HELPER_LOG("\n");
+    return cc;
+}
+
+static inline uint64_t get_address(CPUS390XState *env, int reg)
+{
+    return wrap_address(env, env->regs[reg]);
+}
+
+/*
+ * Store the address to the given register, zeroing out unused leftmost
+ * bits in bit positions 32-63 (24-bit and 31-bit mode only).
+ */
+static inline void set_address_zero(CPUS390XState *env, int reg,
+                                    uint64_t address)
+{
+    if (env->psw.mask & PSW_MASK_64) {
+        env->regs[reg] = address;
+    } else {
+        if (!(env->psw.mask & PSW_MASK_32)) {
+            address &= 0x00ffffff;
+        } else {
+            address &= 0x7fffffff;
+        }
+        env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
+    }
+}
+
+static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
+{
+    if (env->psw.mask & PSW_MASK_64) {
+        /* 64-Bit mode */
+        env->regs[reg] = address;
+    } else {
+        if (!(env->psw.mask & PSW_MASK_32)) {
+            /* 24-Bit mode. According to the PoO it is implementation
+            dependent if bits 32-39 remain unchanged or are set to
+            zeros.  Choose the former so that the function can also be
+            used for TRT.  */
+            env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
+        } else {
+            /* 31-Bit mode. According to the PoO it is implementation
+            dependent if bit 32 remains unchanged or is set to zero.
+            Choose the latter so that the function can also be used for
+            TRT.  */
+            address &= 0x7fffffff;
+            env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
+        }
+    }
+}
+
+static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
+{
+    if (!(env->psw.mask & PSW_MASK_64)) {
+        return (uint32_t)length;
+    }
+    return length;
+}
+
+static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
+{
+    if (!(env->psw.mask & PSW_MASK_64)) {
+        /* 24-Bit and 31-Bit mode */
+        length &= 0x7fffffff;
+    }
+    return length;
+}
+
+static inline uint64_t get_length(CPUS390XState *env, int reg)
+{
+    return wrap_length31(env, env->regs[reg]);
+}
+
+static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
+{
+    if (env->psw.mask & PSW_MASK_64) {
+        /* 64-Bit mode */
+        env->regs[reg] = length;
+    } else {
+        /* 24-Bit and 31-Bit mode */
+        env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
+    }
+}
+
+/* search string (c is byte to search, r2 is string, r1 end of string) */
+void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+    uintptr_t ra = GETPC();
+    uint64_t end, str;
+    uint32_t len;
+    uint8_t v, c = env->regs[0];
+
+    /* Bits 32-55 must contain all 0.  */
+    if (env->regs[0] & 0xffffff00u) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    str = get_address(env, r2);
+    end = get_address(env, r1);
+
+    /* Lest we fail to service interrupts in a timely manner, limit the
+       amount of work we're willing to do.  For now, let's cap at 8k.  */
+    for (len = 0; len < 0x2000; ++len) {
+        if (str + len == end) {
+            /* Character not found.  R1 & R2 are unmodified.  */
+            env->cc_op = 2;
+            return;
+        }
+        v = cpu_ldub_data_ra(env, str + len, ra);
+        if (v == c) {
+            /* Character found.  Set R1 to the location; R2 is unmodified.  */
+            env->cc_op = 1;
+            set_address(env, r1, str + len);
+            return;
+        }
+    }
+
+    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
+    env->cc_op = 3;
+    set_address(env, r2, str + len);
+}
+
+void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+    uintptr_t ra = GETPC();
+    uint32_t len;
+    uint16_t v, c = env->regs[0];
+    uint64_t end, str, adj_end;
+
+    /* Bits 32-47 of R0 must be zero.  */
+    if (env->regs[0] & 0xffff0000u) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    str = get_address(env, r2);
+    end = get_address(env, r1);
+
+    /* If the LSB of the two addresses differ, use one extra byte.  */
+    adj_end = end + ((str ^ end) & 1);
+
+    /* Lest we fail to service interrupts in a timely manner, limit the
+       amount of work we're willing to do.  For now, let's cap at 8k.  */
+    for (len = 0; len < 0x2000; len += 2) {
+        if (str + len == adj_end) {
+            /* End of input found.  */
+            env->cc_op = 2;
+            return;
+        }
+        v = cpu_lduw_data_ra(env, str + len, ra);
+        if (v == c) {
+            /* Character found.  Set R1 to the location; R2 is unmodified.  */
+            env->cc_op = 1;
+            set_address(env, r1, str + len);
+            return;
+        }
+    }
+
+    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
+    env->cc_op = 3;
+    set_address(env, r2, str + len);
+}
+
+/* unsigned string compare (c is string terminator) */
+uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
+{
+    uintptr_t ra = GETPC();
+    uint32_t len;
+
+    c = c & 0xff;
+    s1 = wrap_address(env, s1);
+    s2 = wrap_address(env, s2);
+
+    /* Lest we fail to service interrupts in a timely manner, limit the
+       amount of work we're willing to do.  For now, let's cap at 8k.  */
+    for (len = 0; len < 0x2000; ++len) {
+        uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
+        uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
+        if (v1 == v2) {
+            if (v1 == c) {
+                /* Equal.  CC=0, and don't advance the registers.  */
+                env->cc_op = 0;
+                env->retxl = s2;
+                return s1;
+            }
+        } else {
+            /* Unequal.  CC={1,2}, and advance the registers.  Note that
+               the terminator need not be zero, but the string that contains
+               the terminator is by definition "low".  */
+            env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
+            env->retxl = s2 + len;
+            return s1 + len;
+        }
+    }
+
+    /* CPU-determined bytes equal; advance the registers.  */
+    env->cc_op = 3;
+    env->retxl = s2 + len;
+    return s1 + len;
+}
+
+/* move page */
+uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
+{
+    const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
+    const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
+    const int mmu_idx = cpu_mmu_index(env, false);
+    const bool f = extract64(r0, 11, 1);
+    const bool s = extract64(r0, 10, 1);
+    const bool cco = extract64(r0, 8, 1);
+    uintptr_t ra = GETPC();
+    S390Access srca, desta;
+    int exc;
+
+    if ((f && s) || extract64(r0, 12, 4)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
+    }
+
+    /*
+     * We always manually handle exceptions such that we can properly store
+     * r1/r2 to the lowcore on page-translation exceptions.
+     *
+     * TODO: Access key handling
+     */
+    exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
+                            MMU_DATA_LOAD, mmu_idx, ra);
+    if (exc) {
+        if (cco) {
+            return 2;
+        }
+        goto inject_exc;
+    }
+    exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
+                            MMU_DATA_STORE, mmu_idx, ra);
+    if (exc) {
+        if (cco && exc != PGM_PROTECTION) {
+            return 1;
+        }
+        goto inject_exc;
+    }
+    access_memmove(env, &desta, &srca, ra);
+    return 0; /* data moved */
+inject_exc:
+#if !defined(CONFIG_USER_ONLY)
+    if (exc != PGM_ADDRESSING) {
+        stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
+                 env->tlb_fill_tec);
+    }
+    if (exc == PGM_PAGE_TRANS) {
+        stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
+                 r1 << 4 | r2);
+    }
+#endif
+    tcg_s390_program_interrupt(env, exc, ra);
+}
+
+/* string copy */
+uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+    const int mmu_idx = cpu_mmu_index(env, false);
+    const uint64_t d = get_address(env, r1);
+    const uint64_t s = get_address(env, r2);
+    const uint8_t c = env->regs[0];
+    const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
+    S390Access srca, desta;
+    uintptr_t ra = GETPC();
+    int i;
+
+    if (env->regs[0] & 0xffffff00ull) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    /*
+     * Our access should not exceed single pages, as we must not report access
+     * exceptions exceeding the actually copied range (which we don't know at
+     * this point). We might over-indicate watchpoints within the pages
+     * (if we ever care, we have to limit processing to a single byte).
+     */
+    srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
+    desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
+    for (i = 0; i < len; i++) {
+        const uint8_t v = access_get_byte(env, &srca, i, ra);
+
+        access_set_byte(env, &desta, i, v, ra);
+        if (v == c) {
+            set_address_zero(env, r1, d + i);
+            return 1;
+        }
+    }
+    set_address_zero(env, r1, d + len);
+    set_address_zero(env, r2, s + len);
+    return 3;
+}
+
+/* load access registers r1 to r3 from memory at a2 */
+void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
+{
+    uintptr_t ra = GETPC();
+    int i;
+
+    if (a2 & 0x3) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    for (i = r1;; i = (i + 1) % 16) {
+        env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
+        a2 += 4;
+
+        if (i == r3) {
+            break;
+        }
+    }
+}
+
+/* store access registers r1 to r3 in memory at a2 */
+void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
+{
+    uintptr_t ra = GETPC();
+    int i;
+
+    if (a2 & 0x3) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    for (i = r1;; i = (i + 1) % 16) {
+        cpu_stl_data_ra(env, a2, env->aregs[i], ra);
+        a2 += 4;
+
+        if (i == r3) {
+            break;
+        }
+    }
+}
+
+/* move long helper */
+static inline uint32_t do_mvcl(CPUS390XState *env,
+                               uint64_t *dest, uint64_t *destlen,
+                               uint64_t *src, uint64_t *srclen,
+                               uint16_t pad, int wordsize, uintptr_t ra)
+{
+    const int mmu_idx = cpu_mmu_index(env, false);
+    int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
+    S390Access srca, desta;
+    int i, cc;
+
+    if (*destlen == *srclen) {
+        cc = 0;
+    } else if (*destlen < *srclen) {
+        cc = 1;
+    } else {
+        cc = 2;
+    }
+
+    if (!*destlen) {
+        return cc;
+    }
+
+    /*
+     * Only perform one type of type of operation (move/pad) at a time.
+     * Stay within single pages.
+     */
+    if (*srclen) {
+        /* Copy the src array */
+        len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
+        *destlen -= len;
+        *srclen -= len;
+        srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
+        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
+        access_memmove(env, &desta, &srca, ra);
+        *src = wrap_address(env, *src + len);
+        *dest = wrap_address(env, *dest + len);
+    } else if (wordsize == 1) {
+        /* Pad the remaining area */
+        *destlen -= len;
+        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
+        access_memset(env, &desta, pad, ra);
+        *dest = wrap_address(env, *dest + len);
+    } else {
+        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
+
+        /* The remaining length selects the padding byte. */
+        for (i = 0; i < len; (*destlen)--, i++) {
+            if (*destlen & 1) {
+                access_set_byte(env, &desta, i, pad, ra);
+            } else {
+                access_set_byte(env, &desta, i, pad >> 8, ra);
+            }
+        }
+        *dest = wrap_address(env, *dest + len);
+    }
+
+    return *destlen ? 3 : cc;
+}
+
+/* move long */
+uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+    const int mmu_idx = cpu_mmu_index(env, false);
+    uintptr_t ra = GETPC();
+    uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
+    uint64_t dest = get_address(env, r1);
+    uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
+    uint64_t src = get_address(env, r2);
+    uint8_t pad = env->regs[r2 + 1] >> 24;
+    CPUState *cs = env_cpu(env);
+    S390Access srca, desta;
+    uint32_t cc, cur_len;
+
+    if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
+        cc = 3;
+    } else if (srclen == destlen) {
+        cc = 0;
+    } else if (destlen < srclen) {
+        cc = 1;
+    } else {
+        cc = 2;
+    }
+
+    /* We might have to zero-out some bits even if there was no action. */
+    if (unlikely(!destlen || cc == 3)) {
+        set_address_zero(env, r2, src);
+        set_address_zero(env, r1, dest);
+        return cc;
+    } else if (!srclen) {
+        set_address_zero(env, r2, src);
+    }
+
+    /*
+     * Only perform one type of type of operation (move/pad) in one step.
+     * Stay within single pages.
+     */
+    while (destlen) {
+        cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
+        if (!srclen) {
+            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
+                                   ra);
+            access_memset(env, &desta, pad, ra);
+        } else {
+            cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
+
+            srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
+                                  ra);
+            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
+                                   ra);
+            access_memmove(env, &desta, &srca, ra);
+            src = wrap_address(env, src + cur_len);
+            srclen -= cur_len;
+            env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
+            set_address_zero(env, r2, src);
+        }
+        dest = wrap_address(env, dest + cur_len);
+        destlen -= cur_len;
+        env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
+        set_address_zero(env, r1, dest);
+
+        /*
+         * MVCL is interruptible. Return to the main loop if requested after
+         * writing back all state to registers. If no interrupt will get
+         * injected, we'll end up back in this handler and continue processing
+         * the remaining parts.
+         */
+        if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
+            cpu_loop_exit_restore(cs, ra);
+        }
+    }
+    return cc;
+}
+
+/* move long extended */
+uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
+                       uint32_t r3)
+{
+    uintptr_t ra = GETPC();
+    uint64_t destlen = get_length(env, r1 + 1);
+    uint64_t dest = get_address(env, r1);
+    uint64_t srclen = get_length(env, r3 + 1);
+    uint64_t src = get_address(env, r3);
+    uint8_t pad = a2;
+    uint32_t cc;
+
+    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
+
+    set_length(env, r1 + 1, destlen);
+    set_length(env, r3 + 1, srclen);
+    set_address(env, r1, dest);
+    set_address(env, r3, src);
+
+    return cc;
+}
+
+/* move long unicode */
+uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
+                       uint32_t r3)
+{
+    uintptr_t ra = GETPC();
+    uint64_t destlen = get_length(env, r1 + 1);
+    uint64_t dest = get_address(env, r1);
+    uint64_t srclen = get_length(env, r3 + 1);
+    uint64_t src = get_address(env, r3);
+    uint16_t pad = a2;
+    uint32_t cc;
+
+    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
+
+    set_length(env, r1 + 1, destlen);
+    set_length(env, r3 + 1, srclen);
+    set_address(env, r1, dest);
+    set_address(env, r3, src);
+
+    return cc;
+}
+
+/* compare logical long helper */
+static inline uint32_t do_clcl(CPUS390XState *env,
+                               uint64_t *src1, uint64_t *src1len,
+                               uint64_t *src3, uint64_t *src3len,
+                               uint16_t pad, uint64_t limit,
+                               int wordsize, uintptr_t ra)
+{
+    uint64_t len = MAX(*src1len, *src3len);
+    uint32_t cc = 0;
+
+    check_alignment(env, *src1len | *src3len, wordsize, ra);
+
+    if (!len) {
+        return cc;
+    }
+
+    /* Lest we fail to service interrupts in a timely manner, limit the
+       amount of work we're willing to do.  */
+    if (len > limit) {
+        len = limit;
+        cc = 3;
+    }
+
+    for (; len; len -= wordsize) {
+        uint16_t v1 = pad;
+        uint16_t v3 = pad;
+
+        if (*src1len) {
+            v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
+        }
+        if (*src3len) {
+            v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
+        }
+
+        if (v1 != v3) {
+            cc = (v1 < v3) ? 1 : 2;
+            break;
+        }
+
+        if (*src1len) {
+            *src1 += wordsize;
+            *src1len -= wordsize;
+        }
+        if (*src3len) {
+            *src3 += wordsize;
+            *src3len -= wordsize;
+        }
+    }
+
+    return cc;
+}
+
+
+/* compare logical long */
+uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+    uintptr_t ra = GETPC();
+    uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
+    uint64_t src1 = get_address(env, r1);
+    uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
+    uint64_t src3 = get_address(env, r2);
+    uint8_t pad = env->regs[r2 + 1] >> 24;
+    uint32_t cc;
+
+    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
+
+    env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
+    env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
+    set_address(env, r1, src1);
+    set_address(env, r2, src3);
+
+    return cc;
+}
+
+/* compare logical long extended memcompare insn with padding */
+uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
+                       uint32_t r3)
+{
+    uintptr_t ra = GETPC();
+    uint64_t src1len = get_length(env, r1 + 1);
+    uint64_t src1 = get_address(env, r1);
+    uint64_t src3len = get_length(env, r3 + 1);
+    uint64_t src3 = get_address(env, r3);
+    uint8_t pad = a2;
+    uint32_t cc;
+
+    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
+
+    set_length(env, r1 + 1, src1len);
+    set_length(env, r3 + 1, src3len);
+    set_address(env, r1, src1);
+    set_address(env, r3, src3);
+
+    return cc;
+}
+
+/* compare logical long unicode memcompare insn with padding */
+uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
+                       uint32_t r3)
+{
+    uintptr_t ra = GETPC();
+    uint64_t src1len = get_length(env, r1 + 1);
+    uint64_t src1 = get_address(env, r1);
+    uint64_t src3len = get_length(env, r3 + 1);
+    uint64_t src3 = get_address(env, r3);
+    uint16_t pad = a2;
+    uint32_t cc = 0;
+
+    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
+
+    set_length(env, r1 + 1, src1len);
+    set_length(env, r3 + 1, src3len);
+    set_address(env, r1, src1);
+    set_address(env, r3, src3);
+
+    return cc;
+}
+
+/* checksum */
+uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
+                      uint64_t src, uint64_t src_len)
+{
+    uintptr_t ra = GETPC();
+    uint64_t max_len, len;
+    uint64_t cksm = (uint32_t)r1;
+
+    /* Lest we fail to service interrupts in a timely manner, limit the
+       amount of work we're willing to do.  For now, let's cap at 8k.  */
+    max_len = (src_len > 0x2000 ? 0x2000 : src_len);
+
+    /* Process full words as available.  */
+    for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
+        cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
+    }
+
+    switch (max_len - len) {
+    case 1:
+        cksm += cpu_ldub_data_ra(env, src, ra) << 24;
+        len += 1;
+        break;
+    case 2:
+        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
+        len += 2;
+        break;
+    case 3:
+        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
+        cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
+        len += 3;
+        break;
+    }
+
+    /* Fold the carry from the checksum.  Note that we can see carry-out
+       during folding more than once (but probably not more than twice).  */
+    while (cksm > 0xffffffffull) {
+        cksm = (uint32_t)cksm + (cksm >> 32);
+    }
+
+    /* Indicate whether or not we've processed everything.  */
+    env->cc_op = (len == src_len ? 0 : 3);
+
+    /* Return both cksm and processed length.  */
+    env->retxl = cksm;
+    return len;
+}
+
+void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
+{
+    uintptr_t ra = GETPC();
+    int len_dest = len >> 4;
+    int len_src = len & 0xf;
+    uint8_t b;
+
+    dest += len_dest;
+    src += len_src;
+
+    /* last byte is special, it only flips the nibbles */
+    b = cpu_ldub_data_ra(env, src, ra);
+    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
+    src--;
+    len_src--;
+
+    /* now pack every value */
+    while (len_dest > 0) {
+        b = 0;
+
+        if (len_src >= 0) {
+            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
+            src--;
+            len_src--;
+        }
+        if (len_src >= 0) {
+            b |= cpu_ldub_data_ra(env, src, ra) << 4;
+            src--;
+            len_src--;
+        }
+
+        len_dest--;
+        dest--;
+        cpu_stb_data_ra(env, dest, b, ra);
+    }
+}
+
+static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
+                           uint32_t srclen, int ssize, uintptr_t ra)
+{
+    int i;
+    /* The destination operand is always 16 bytes long.  */
+    const int destlen = 16;
+
+    /* The operands are processed from right to left.  */
+    src += srclen - 1;
+    dest += destlen - 1;
+
+    for (i = 0; i < destlen; i++) {
+        uint8_t b = 0;
+
+        /* Start with a positive sign */
+        if (i == 0) {
+            b = 0xc;
+        } else if (srclen > ssize) {
+            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
+            src -= ssize;
+            srclen -= ssize;
+        }
+
+        if (srclen > ssize) {
+            b |= cpu_ldub_data_ra(env, src, ra) << 4;
+            src -= ssize;
+            srclen -= ssize;
+        }
+
+        cpu_stb_data_ra(env, dest, b, ra);
+        dest--;
+    }
+}
+
+
+void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
+                 uint32_t srclen)
+{
+    do_pkau(env, dest, src, srclen, 1, GETPC());
+}
+
+void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
+                 uint32_t srclen)
+{
+    do_pkau(env, dest, src, srclen, 2, GETPC());
+}
+
+void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
+                  uint64_t src)
+{
+    uintptr_t ra = GETPC();
+    int len_dest = len >> 4;
+    int len_src = len & 0xf;
+    uint8_t b;
+    int second_nibble = 0;
+
+    dest += len_dest;
+    src += len_src;
+
+    /* last byte is special, it only flips the nibbles */
+    b = cpu_ldub_data_ra(env, src, ra);
+    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
+    src--;
+    len_src--;
+
+    /* now pad every nibble with 0xf0 */
+
+    while (len_dest > 0) {
+        uint8_t cur_byte = 0;
+
+        if (len_src > 0) {
+            cur_byte = cpu_ldub_data_ra(env, src, ra);
+        }
+
+        len_dest--;
+        dest--;
+
+        /* only advance one nibble at a time */
+        if (second_nibble) {
+            cur_byte >>= 4;
+            len_src--;
+            src--;
+        }
+        second_nibble = !second_nibble;
+
+        /* digit */
+        cur_byte = (cur_byte & 0xf);
+        /* zone bits */
+        cur_byte |= 0xf0;
+
+        cpu_stb_data_ra(env, dest, cur_byte, ra);
+    }
+}
+
+static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
+                                 uint32_t destlen, int dsize, uint64_t src,
+                                 uintptr_t ra)
+{
+    int i;
+    uint32_t cc;
+    uint8_t b;
+    /* The source operand is always 16 bytes long.  */
+    const int srclen = 16;
+
+    /* The operands are processed from right to left.  */
+    src += srclen - 1;
+    dest += destlen - dsize;
+
+    /* Check for the sign.  */
+    b = cpu_ldub_data_ra(env, src, ra);
+    src--;
+    switch (b & 0xf) {
+    case 0xa:
+    case 0xc:
+    case 0xe ... 0xf:
+        cc = 0;  /* plus */
+        break;
+    case 0xb:
+    case 0xd:
+        cc = 1;  /* minus */
+        break;
+    default:
+    case 0x0 ... 0x9:
+        cc = 3;  /* invalid */
+        break;
+    }
+
+    /* Now pad every nibble with 0x30, advancing one nibble at a time. */
+    for (i = 0; i < destlen; i += dsize) {
+        if (i == (31 * dsize)) {
+            /* If length is 32/64 bytes, the leftmost byte is 0. */
+            b = 0;
+        } else if (i % (2 * dsize)) {
+            b = cpu_ldub_data_ra(env, src, ra);
+            src--;
+        } else {
+            b >>= 4;
+        }
+        cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
+        dest -= dsize;
+    }
+
+    return cc;
+}
+
+uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
+                       uint64_t src)
+{
+    return do_unpkau(env, dest, destlen, 1, src, GETPC());
+}
+
+uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
+                       uint64_t src)
+{
+    return do_unpkau(env, dest, destlen, 2, src, GETPC());
+}
+
+uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
+{
+    uintptr_t ra = GETPC();
+    uint32_t cc = 0;
+    int i;
+
+    for (i = 0; i < destlen; i++) {
+        uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
+        /* digit */
+        cc |= (b & 0xf0) > 0x90 ? 2 : 0;
+
+        if (i == (destlen - 1)) {
+            /* sign */
+            cc |= (b & 0xf) < 0xa ? 1 : 0;
+        } else {
+            /* digit */
+            cc |= (b & 0xf) > 0x9 ? 2 : 0;
+        }
+    }
+
+    return cc;
+}
+
+static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
+                             uint64_t trans, uintptr_t ra)
+{
+    uint32_t i;
+
+    for (i = 0; i <= len; i++) {
+        uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
+        uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
+        cpu_stb_data_ra(env, array + i, new_byte, ra);
+    }
+
+    return env->cc_op;
+}
+
+void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
+                uint64_t trans)
+{
+    do_helper_tr(env, len, array, trans, GETPC());
+}
+
+uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
+                     uint64_t len, uint64_t trans)
+{
+    uintptr_t ra = GETPC();
+    uint8_t end = env->regs[0] & 0xff;
+    uint64_t l = len;
+    uint64_t i;
+    uint32_t cc = 0;
+
+    if (!(env->psw.mask & PSW_MASK_64)) {
+        array &= 0x7fffffff;
+        l = (uint32_t)l;
+    }
+
+    /* Lest we fail to service interrupts in a timely manner, limit the
+       amount of work we're willing to do.  For now, let's cap at 8k.  */
+    if (l > 0x2000) {
+        l = 0x2000;
+        cc = 3;
+    }
+
+    for (i = 0; i < l; i++) {
+        uint8_t byte, new_byte;
+
+        byte = cpu_ldub_data_ra(env, array + i, ra);
+
+        if (byte == end) {
+            cc = 1;
+            break;
+        }
+
+        new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
+        cpu_stb_data_ra(env, array + i, new_byte, ra);
+    }
+
+    env->cc_op = cc;
+    env->retxl = len - i;
+    return array + i;
+}
+
+static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
+                                     uint64_t array, uint64_t trans,
+                                     int inc, uintptr_t ra)
+{
+    int i;
+
+    for (i = 0; i <= len; i++) {
+        uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
+        uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
+
+        if (sbyte != 0) {
+            set_address(env, 1, array + i * inc);
+            env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
+            return (i == len) ? 2 : 1;
+        }
+    }
+
+    return 0;
+}
+
+static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
+                                  uint64_t array, uint64_t trans,
+                                  uintptr_t ra)
+{
+    return do_helper_trt(env, len, array, trans, 1, ra);
+}
+
+uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
+                     uint64_t trans)
+{
+    return do_helper_trt(env, len, array, trans, 1, GETPC());
+}
+
+static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
+                                   uint64_t array, uint64_t trans,
+                                   uintptr_t ra)
+{
+    return do_helper_trt(env, len, array, trans, -1, ra);
+}
+
+uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
+                      uint64_t trans)
+{
+    return do_helper_trt(env, len, array, trans, -1, GETPC());
+}
+
+/* Translate one/two to one/two */
+uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
+                      uint32_t tst, uint32_t sizes)
+{
+    uintptr_t ra = GETPC();
+    int dsize = (sizes & 1) ? 1 : 2;
+    int ssize = (sizes & 2) ? 1 : 2;
+    uint64_t tbl = get_address(env, 1);
+    uint64_t dst = get_address(env, r1);
+    uint64_t len = get_length(env, r1 + 1);
+    uint64_t src = get_address(env, r2);
+    uint32_t cc = 3;
+    int i;
+
+    /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
+       the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
+       the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
+    if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
+        tbl &= -4096;
+    } else {
+        tbl &= -8;
+    }
+
+    check_alignment(env, len, ssize, ra);
+
+    /* Lest we fail to service interrupts in a timely manner, */
+    /* limit the amount of work we're willing to do.   */
+    for (i = 0; i < 0x2000; i++) {
+        uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
+        uint64_t tble = tbl + (sval * dsize);
+        uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
+        if (dval == tst) {
+            cc = 1;
+            break;
+        }
+        cpu_stsize_data_ra(env, dst, dval, dsize, ra);
+
+        len -= ssize;
+        src += ssize;
+        dst += dsize;
+
+        if (len == 0) {
+            cc = 0;
+            break;
+        }
+    }
+
+    set_address(env, r1, dst);
+    set_length(env, r1 + 1, len);
+    set_address(env, r2, src);
+
+    return cc;
+}
+
+void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
+                  uint32_t r1, uint32_t r3)
+{
+    uintptr_t ra = GETPC();
+    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
+    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
+    Int128 oldv;
+    uint64_t oldh, oldl;
+    bool fail;
+
+    check_alignment(env, addr, 16, ra);
+
+    oldh = cpu_ldq_data_ra(env, addr + 0, ra);
+    oldl = cpu_ldq_data_ra(env, addr + 8, ra);
+
+    oldv = int128_make128(oldl, oldh);
+    fail = !int128_eq(oldv, cmpv);
+    if (fail) {
+        newv = oldv;
+    }
+
+    cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
+    cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
+
+    env->cc_op = fail;
+    env->regs[r1] = int128_gethi(oldv);
+    env->regs[r1 + 1] = int128_getlo(oldv);
+}
+
+void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
+                           uint32_t r1, uint32_t r3)
+{
+    uintptr_t ra = GETPC();
+    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
+    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
+    int mem_idx;
+    TCGMemOpIdx oi;
+    Int128 oldv;
+    bool fail;
+
+    assert(HAVE_CMPXCHG128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+    fail = !int128_eq(oldv, cmpv);
+
+    env->cc_op = fail;
+    env->regs[r1] = int128_gethi(oldv);
+    env->regs[r1 + 1] = int128_getlo(oldv);
+}
+
+static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
+                        uint64_t a2, bool parallel)
+{
+    uint32_t mem_idx = cpu_mmu_index(env, false);
+    uintptr_t ra = GETPC();
+    uint32_t fc = extract32(env->regs[0], 0, 8);
+    uint32_t sc = extract32(env->regs[0], 8, 8);
+    uint64_t pl = get_address(env, 1) & -16;
+    uint64_t svh, svl;
+    uint32_t cc;
+
+    /* Sanity check the function code and storage characteristic.  */
+    if (fc > 1 || sc > 3) {
+        if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
+            goto spec_exception;
+        }
+        if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
+            goto spec_exception;
+        }
+    }
+
+    /* Sanity check the alignments.  */
+    if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
+        goto spec_exception;
+    }
+
+    /* Sanity check writability of the store address.  */
+    probe_write(env, a2, 1 << sc, mem_idx, ra);
+
+    /*
+     * Note that the compare-and-swap is atomic, and the store is atomic,
+     * but the complete operation is not.  Therefore we do not need to
+     * assert serial context in order to implement this.  That said,
+     * restart early if we can't support either operation that is supposed
+     * to be atomic.
+     */
+    if (parallel) {
+        uint32_t max = 2;
+#ifdef CONFIG_ATOMIC64
+        max = 3;
+#endif
+        if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
+            (HAVE_ATOMIC128  ? 0 : sc > max)) {
+            cpu_loop_exit_atomic(env_cpu(env), ra);
+        }
+    }
+
+    /* All loads happen before all stores.  For simplicity, load the entire
+       store value area from the parameter list.  */
+    svh = cpu_ldq_data_ra(env, pl + 16, ra);
+    svl = cpu_ldq_data_ra(env, pl + 24, ra);
+
+    switch (fc) {
+    case 0:
+        {
+            uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
+            uint32_t cv = env->regs[r3];
+            uint32_t ov;
+
+            if (parallel) {
+#ifdef CONFIG_USER_ONLY
+                uint32_t *haddr = g2h(env_cpu(env), a1);
+                ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
+#else
+                TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
+                ov = helper_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
+#endif
+            } else {
+                ov = cpu_ldl_data_ra(env, a1, ra);
+                cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
+            }
+            cc = (ov != cv);
+            env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
+        }
+        break;
+
+    case 1:
+        {
+            uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
+            uint64_t cv = env->regs[r3];
+            uint64_t ov;
+
+            if (parallel) {
+#ifdef CONFIG_ATOMIC64
+# ifdef CONFIG_USER_ONLY
+                uint64_t *haddr = g2h(env_cpu(env), a1);
+                ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
+# else
+                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
+                ov = helper_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
+# endif
+#else
+                /* Note that we asserted !parallel above.  */
+                g_assert_not_reached();
+#endif
+            } else {
+                ov = cpu_ldq_data_ra(env, a1, ra);
+                cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
+            }
+            cc = (ov != cv);
+            env->regs[r3] = ov;
+        }
+        break;
+
+    case 2:
+        {
+            uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
+            uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
+            Int128 nv = int128_make128(nvl, nvh);
+            Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
+            Int128 ov;
+
+            if (!parallel) {
+                uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
+                uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
+
+                ov = int128_make128(ol, oh);
+                cc = !int128_eq(ov, cv);
+                if (cc) {
+                    nv = ov;
+                }
+
+                cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
+                cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
+            } else if (HAVE_CMPXCHG128) {
+                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+                ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
+                cc = !int128_eq(ov, cv);
+            } else {
+                /* Note that we asserted !parallel above.  */
+                g_assert_not_reached();
+            }
+
+            env->regs[r3 + 0] = int128_gethi(ov);
+            env->regs[r3 + 1] = int128_getlo(ov);
+        }
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    /* Store only if the comparison succeeded.  Note that above we use a pair
+       of 64-bit big-endian loads, so for sc < 3 we must extract the value
+       from the most-significant bits of svh.  */
+    if (cc == 0) {
+        switch (sc) {
+        case 0:
+            cpu_stb_data_ra(env, a2, svh >> 56, ra);
+            break;
+        case 1:
+            cpu_stw_data_ra(env, a2, svh >> 48, ra);
+            break;
+        case 2:
+            cpu_stl_data_ra(env, a2, svh >> 32, ra);
+            break;
+        case 3:
+            cpu_stq_data_ra(env, a2, svh, ra);
+            break;
+        case 4:
+            if (!parallel) {
+                cpu_stq_data_ra(env, a2 + 0, svh, ra);
+                cpu_stq_data_ra(env, a2 + 8, svl, ra);
+            } else if (HAVE_ATOMIC128) {
+                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+                Int128 sv = int128_make128(svl, svh);
+                helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
+            } else {
+                /* Note that we asserted !parallel above.  */
+                g_assert_not_reached();
+            }
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+
+    return cc;
+
+ spec_exception:
+    tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+}
+
+uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
+{
+    return do_csst(env, r3, a1, a2, false);
+}
+
+uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
+                               uint64_t a2)
+{
+    return do_csst(env, r3, a1, a2, true);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
+{
+    uintptr_t ra = GETPC();
+    bool PERchanged = false;
+    uint64_t src = a2;
+    uint32_t i;
+
+    if (src & 0x7) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    for (i = r1;; i = (i + 1) % 16) {
+        uint64_t val = cpu_ldq_data_ra(env, src, ra);
+        if (env->cregs[i] != val && i >= 9 && i <= 11) {
+            PERchanged = true;
+        }
+        env->cregs[i] = val;
+        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
+                   i, src, val);
+        src += sizeof(uint64_t);
+
+        if (i == r3) {
+            break;
+        }
+    }
+
+    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
+        s390_cpu_recompute_watchpoints(env_cpu(env));
+    }
+
+    tlb_flush(env_cpu(env));
+}
+
+void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
+{
+    uintptr_t ra = GETPC();
+    bool PERchanged = false;
+    uint64_t src = a2;
+    uint32_t i;
+
+    if (src & 0x3) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    for (i = r1;; i = (i + 1) % 16) {
+        uint32_t val = cpu_ldl_data_ra(env, src, ra);
+        if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
+            PERchanged = true;
+        }
+        env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
+        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
+        src += sizeof(uint32_t);
+
+        if (i == r3) {
+            break;
+        }
+    }
+
+    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
+        s390_cpu_recompute_watchpoints(env_cpu(env));
+    }
+
+    tlb_flush(env_cpu(env));
+}
+
+void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
+{
+    uintptr_t ra = GETPC();
+    uint64_t dest = a2;
+    uint32_t i;
+
+    if (dest & 0x7) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    for (i = r1;; i = (i + 1) % 16) {
+        cpu_stq_data_ra(env, dest, env->cregs[i], ra);
+        dest += sizeof(uint64_t);
+
+        if (i == r3) {
+            break;
+        }
+    }
+}
+
+void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
+{
+    uintptr_t ra = GETPC();
+    uint64_t dest = a2;
+    uint32_t i;
+
+    if (dest & 0x3) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    for (i = r1;; i = (i + 1) % 16) {
+        cpu_stl_data_ra(env, dest, env->cregs[i], ra);
+        dest += sizeof(uint32_t);
+
+        if (i == r3) {
+            break;
+        }
+    }
+}
+
+uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
+{
+    uintptr_t ra = GETPC();
+    int i;
+
+    real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
+
+    for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
+        cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
+    }
+
+    return 0;
+}
+
+uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
+{
+    S390CPU *cpu = env_archcpu(env);
+    CPUState *cs = env_cpu(env);
+
+    /*
+     * TODO: we currently don't handle all access protection types
+     * (including access-list and key-controlled) as well as AR mode.
+     */
+    if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
+        /* Fetching permitted; storing permitted */
+        return 0;
+    }
+
+    if (env->int_pgm_code == PGM_PROTECTION) {
+        /* retry if reading is possible */
+        cs->exception_index = -1;
+        if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
+            /* Fetching permitted; storing not permitted */
+            return 1;
+        }
+    }
+
+    switch (env->int_pgm_code) {
+    case PGM_PROTECTION:
+        /* Fetching not permitted; storing not permitted */
+        cs->exception_index = -1;
+        return 2;
+    case PGM_ADDRESSING:
+    case PGM_TRANS_SPEC:
+        /* exceptions forwarded to the guest */
+        s390_cpu_virt_mem_handle_exc(cpu, GETPC());
+        return 0;
+    }
+
+    /* Translation not available */
+    cs->exception_index = -1;
+    return 3;
+}
+
+/* insert storage key extended */
+uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    static S390SKeysState *ss;
+    static S390SKeysClass *skeyclass;
+    uint64_t addr = wrap_address(env, r2);
+    uint8_t key;
+
+    if (addr > ms->ram_size) {
+        return 0;
+    }
+
+    if (unlikely(!ss)) {
+        ss = s390_get_skeys_device();
+        skeyclass = S390_SKEYS_GET_CLASS(ss);
+    }
+
+    if (skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key)) {
+        return 0;
+    }
+    return key;
+}
+
+/* set storage key extended */
+void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    static S390SKeysState *ss;
+    static S390SKeysClass *skeyclass;
+    uint64_t addr = wrap_address(env, r2);
+    uint8_t key;
+
+    if (addr > ms->ram_size) {
+        return;
+    }
+
+    if (unlikely(!ss)) {
+        ss = s390_get_skeys_device();
+        skeyclass = S390_SKEYS_GET_CLASS(ss);
+    }
+
+    key = (uint8_t) r1;
+    skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
+   /*
+    * As we can only flush by virtual address and not all the entries
+    * that point to a physical address we have to flush the whole TLB.
+    */
+    tlb_flush_all_cpus_synced(env_cpu(env));
+}
+
+/* reset reference bit extended */
+uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    static S390SKeysState *ss;
+    static S390SKeysClass *skeyclass;
+    uint8_t re, key;
+
+    if (r2 > ms->ram_size) {
+        return 0;
+    }
+
+    if (unlikely(!ss)) {
+        ss = s390_get_skeys_device();
+        skeyclass = S390_SKEYS_GET_CLASS(ss);
+    }
+
+    if (skeyclass->get_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
+        return 0;
+    }
+
+    re = key & (SK_R | SK_C);
+    key &= ~SK_R;
+
+    if (skeyclass->set_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
+        return 0;
+    }
+   /*
+    * As we can only flush by virtual address and not all the entries
+    * that point to a physical address we have to flush the whole TLB.
+    */
+    tlb_flush_all_cpus_synced(env_cpu(env));
+
+    /*
+     * cc
+     *
+     * 0  Reference bit zero; change bit zero
+     * 1  Reference bit zero; change bit one
+     * 2  Reference bit one; change bit zero
+     * 3  Reference bit one; change bit one
+     */
+
+    return re >> 1;
+}
+
+uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
+{
+    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
+    S390Access srca, desta;
+    uintptr_t ra = GETPC();
+    int cc = 0;
+
+    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
+               __func__, l, a1, a2);
+
+    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
+        psw_as == AS_HOME || psw_as == AS_ACCREG) {
+        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
+    }
+
+    l = wrap_length32(env, l);
+    if (l > 256) {
+        /* max 256 */
+        l = 256;
+        cc = 3;
+    } else if (!l) {
+        return cc;
+    }
+
+    /* TODO: Access key handling */
+    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
+    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
+    access_memmove(env, &desta, &srca, ra);
+    return cc;
+}
+
+uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
+{
+    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
+    S390Access srca, desta;
+    uintptr_t ra = GETPC();
+    int cc = 0;
+
+    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
+               __func__, l, a1, a2);
+
+    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
+        psw_as == AS_HOME || psw_as == AS_ACCREG) {
+        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
+    }
+
+    l = wrap_length32(env, l);
+    if (l > 256) {
+        /* max 256 */
+        l = 256;
+        cc = 3;
+    } else if (!l) {
+        return cc;
+    }
+
+    /* TODO: Access key handling */
+    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
+    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
+    access_memmove(env, &desta, &srca, ra);
+    return cc;
+}
+
+void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
+{
+    CPUState *cs = env_cpu(env);
+    const uintptr_t ra = GETPC();
+    uint64_t table, entry, raddr;
+    uint16_t entries, i, index = 0;
+
+    if (r2 & 0xff000) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    if (!(r2 & 0x800)) {
+        /* invalidation-and-clearing operation */
+        table = r1 & ASCE_ORIGIN;
+        entries = (r2 & 0x7ff) + 1;
+
+        switch (r1 & ASCE_TYPE_MASK) {
+        case ASCE_TYPE_REGION1:
+            index = (r2 >> 53) & 0x7ff;
+            break;
+        case ASCE_TYPE_REGION2:
+            index = (r2 >> 42) & 0x7ff;
+            break;
+        case ASCE_TYPE_REGION3:
+            index = (r2 >> 31) & 0x7ff;
+            break;
+        case ASCE_TYPE_SEGMENT:
+            index = (r2 >> 20) & 0x7ff;
+            break;
+        }
+        for (i = 0; i < entries; i++) {
+            /* addresses are not wrapped in 24/31bit mode but table index is */
+            raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
+            entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
+            if (!(entry & REGION_ENTRY_I)) {
+                /* we are allowed to not store if already invalid */
+                entry |= REGION_ENTRY_I;
+                cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
+            }
+        }
+    }
+
+    /* We simply flush the complete tlb, therefore we can ignore r3. */
+    if (m4 & 1) {
+        tlb_flush(cs);
+    } else {
+        tlb_flush_all_cpus_synced(cs);
+    }
+}
+
+/* invalidate pte */
+void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
+                  uint32_t m4)
+{
+    CPUState *cs = env_cpu(env);
+    const uintptr_t ra = GETPC();
+    uint64_t page = vaddr & TARGET_PAGE_MASK;
+    uint64_t pte_addr, pte;
+
+    /* Compute the page table entry address */
+    pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
+    pte_addr += VADDR_PAGE_TX(vaddr) * 8;
+
+    /* Mark the page table entry as invalid */
+    pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
+    pte |= PAGE_ENTRY_I;
+    cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
+
+    /* XXX we exploit the fact that Linux passes the exact virtual
+       address here - it's not obliged to! */
+    if (m4 & 1) {
+        if (vaddr & ~VADDR_PAGE_TX_MASK) {
+            tlb_flush_page(cs, page);
+            /* XXX 31-bit hack */
+            tlb_flush_page(cs, page ^ 0x80000000);
+        } else {
+            /* looks like we don't have a valid virtual address */
+            tlb_flush(cs);
+        }
+    } else {
+        if (vaddr & ~VADDR_PAGE_TX_MASK) {
+            tlb_flush_page_all_cpus_synced(cs, page);
+            /* XXX 31-bit hack */
+            tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
+        } else {
+            /* looks like we don't have a valid virtual address */
+            tlb_flush_all_cpus_synced(cs);
+        }
+    }
+}
+
+/* flush local tlb */
+void HELPER(ptlb)(CPUS390XState *env)
+{
+    tlb_flush(env_cpu(env));
+}
+
+/* flush global tlb */
+void HELPER(purge)(CPUS390XState *env)
+{
+    tlb_flush_all_cpus_synced(env_cpu(env));
+}
+
+/* load real address */
+uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
+{
+    uint64_t asc = env->psw.mask & PSW_MASK_ASC;
+    uint64_t ret, tec;
+    int flags, exc, cc;
+
+    /* XXX incomplete - has more corner cases */
+    if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
+    }
+
+    exc = mmu_translate(env, addr, 0, asc, &ret, &flags, &tec);
+    if (exc) {
+        cc = 3;
+        ret = exc | 0x80000000;
+    } else {
+        cc = 0;
+        ret |= addr & ~TARGET_PAGE_MASK;
+    }
+
+    env->cc_op = cc;
+    return ret;
+}
+#endif
+
+/* load pair from quadword */
+uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
+{
+    uintptr_t ra = GETPC();
+    uint64_t hi, lo;
+
+    check_alignment(env, addr, 16, ra);
+    hi = cpu_ldq_data_ra(env, addr + 0, ra);
+    lo = cpu_ldq_data_ra(env, addr + 8, ra);
+
+    env->retxl = lo;
+    return hi;
+}
+
+uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
+{
+    uintptr_t ra = GETPC();
+    uint64_t hi, lo;
+    int mem_idx;
+    TCGMemOpIdx oi;
+    Int128 v;
+
+    assert(HAVE_ATOMIC128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+    v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
+    hi = int128_gethi(v);
+    lo = int128_getlo(v);
+
+    env->retxl = lo;
+    return hi;
+}
+
+/* store pair to quadword */
+void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
+                  uint64_t low, uint64_t high)
+{
+    uintptr_t ra = GETPC();
+
+    check_alignment(env, addr, 16, ra);
+    cpu_stq_data_ra(env, addr + 0, high, ra);
+    cpu_stq_data_ra(env, addr + 8, low, ra);
+}
+
+void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
+                           uint64_t low, uint64_t high)
+{
+    uintptr_t ra = GETPC();
+    int mem_idx;
+    TCGMemOpIdx oi;
+    Int128 v;
+
+    assert(HAVE_ATOMIC128);
+
+    mem_idx = cpu_mmu_index(env, false);
+    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+    v = int128_make128(low, high);
+    helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
+}
+
+/* Execute instruction.  This instruction executes an insn modified with
+   the contents of r1.  It does not change the executed instruction in memory;
+   it does not change the program counter.
+
+   Perform this by recording the modified instruction in env->ex_value.
+   This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
+*/
+void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
+{
+    uint64_t insn = cpu_lduw_code(env, addr);
+    uint8_t opc = insn >> 8;
+
+    /* Or in the contents of R1[56:63].  */
+    insn |= r1 & 0xff;
+
+    /* Load the rest of the instruction.  */
+    insn <<= 48;
+    switch (get_ilen(opc)) {
+    case 2:
+        break;
+    case 4:
+        insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
+        break;
+    case 6:
+        insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    /* The very most common cases can be sped up by avoiding a new TB.  */
+    if ((opc & 0xf0) == 0xd0) {
+        typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
+                                      uint64_t, uintptr_t);
+        static const dx_helper dx[16] = {
+            [0x0] = do_helper_trt_bkwd,
+            [0x2] = do_helper_mvc,
+            [0x4] = do_helper_nc,
+            [0x5] = do_helper_clc,
+            [0x6] = do_helper_oc,
+            [0x7] = do_helper_xc,
+            [0xc] = do_helper_tr,
+            [0xd] = do_helper_trt_fwd,
+        };
+        dx_helper helper = dx[opc & 0xf];
+
+        if (helper) {
+            uint32_t l = extract64(insn, 48, 8);
+            uint32_t b1 = extract64(insn, 44, 4);
+            uint32_t d1 = extract64(insn, 32, 12);
+            uint32_t b2 = extract64(insn, 28, 4);
+            uint32_t d2 = extract64(insn, 16, 12);
+            uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
+            uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
+
+            env->cc_op = helper(env, l, a1, a2, 0);
+            env->psw.addr += ilen;
+            return;
+        }
+    } else if (opc == 0x0a) {
+        env->int_svc_code = extract64(insn, 48, 8);
+        env->int_svc_ilen = ilen;
+        helper_exception(env, EXCP_SVC);
+        g_assert_not_reached();
+    }
+
+    /* Record the insn we want to execute as well as the ilen to use
+       during the execution of the target insn.  This will also ensure
+       that ex_value is non-zero, which flags that we are in a state
+       that requires such execution.  */
+    env->ex_value = insn | ilen;
+}
+
+uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
+                       uint64_t len)
+{
+    const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
+    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
+    const uint64_t r0 = env->regs[0];
+    const uintptr_t ra = GETPC();
+    uint8_t dest_key, dest_as, dest_k, dest_a;
+    uint8_t src_key, src_as, src_k, src_a;
+    uint64_t val;
+    int cc = 0;
+
+    HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
+               __func__, dest, src, len);
+
+    if (!(env->psw.mask & PSW_MASK_DAT)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
+    }
+
+    /* OAC (operand access control) for the first operand -> dest */
+    val = (r0 & 0xffff0000ULL) >> 16;
+    dest_key = (val >> 12) & 0xf;
+    dest_as = (val >> 6) & 0x3;
+    dest_k = (val >> 1) & 0x1;
+    dest_a = val & 0x1;
+
+    /* OAC (operand access control) for the second operand -> src */
+    val = (r0 & 0x0000ffffULL);
+    src_key = (val >> 12) & 0xf;
+    src_as = (val >> 6) & 0x3;
+    src_k = (val >> 1) & 0x1;
+    src_a = val & 0x1;
+
+    if (!dest_k) {
+        dest_key = psw_key;
+    }
+    if (!src_k) {
+        src_key = psw_key;
+    }
+    if (!dest_a) {
+        dest_as = psw_as;
+    }
+    if (!src_a) {
+        src_as = psw_as;
+    }
+
+    if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
+    }
+    if (!(env->cregs[0] & CR0_SECONDARY) &&
+        (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
+    }
+    if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
+        tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
+    }
+
+    len = wrap_length32(env, len);
+    if (len > 4096) {
+        cc = 3;
+        len = 4096;
+    }
+
+    /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
+    if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
+        (env->psw.mask & PSW_MASK_PSTATE)) {
+        qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
+                      __func__);
+        tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
+    }
+
+    /* FIXME: Access using correct keys and AR-mode */
+    if (len) {
+        S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
+                                         mmu_idx_from_as(src_as), ra);
+        S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
+                                          mmu_idx_from_as(dest_as), ra);
+
+        access_memmove(env, &desta, &srca, ra);
+    }
+
+    return cc;
+}
+
+/* Decode a Unicode character.  A return value < 0 indicates success, storing
+   the UTF-32 result into OCHAR and the input length into OLEN.  A return
+   value >= 0 indicates failure, and the CC value to be returned.  */
+typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
+                                 uint64_t ilen, bool enh_check, uintptr_t ra,
+                                 uint32_t *ochar, uint32_t *olen);
+
+/* Encode a Unicode character.  A return value < 0 indicates success, storing
+   the bytes into ADDR and the output length into OLEN.  A return value >= 0
+   indicates failure, and the CC value to be returned.  */
+typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
+                                 uint64_t ilen, uintptr_t ra, uint32_t c,
+                                 uint32_t *olen);
+
+static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
+                       bool enh_check, uintptr_t ra,
+                       uint32_t *ochar, uint32_t *olen)
+{
+    uint8_t s0, s1, s2, s3;
+    uint32_t c, l;
+
+    if (ilen < 1) {
+        return 0;
+    }
+    s0 = cpu_ldub_data_ra(env, addr, ra);
+    if (s0 <= 0x7f) {
+        /* one byte character */
+        l = 1;
+        c = s0;
+    } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
+        /* invalid character */
+        return 2;
+    } else if (s0 <= 0xdf) {
+        /* two byte character */
+        l = 2;
+        if (ilen < 2) {
+            return 0;
+        }
+        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
+        c = s0 & 0x1f;
+        c = (c << 6) | (s1 & 0x3f);
+        if (enh_check && (s1 & 0xc0) != 0x80) {
+            return 2;
+        }
+    } else if (s0 <= 0xef) {
+        /* three byte character */
+        l = 3;
+        if (ilen < 3) {
+            return 0;
+        }
+        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
+        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
+        c = s0 & 0x0f;
+        c = (c << 6) | (s1 & 0x3f);
+        c = (c << 6) | (s2 & 0x3f);
+        /* Fold the byte-by-byte range descriptions in the PoO into
+           tests against the complete value.  It disallows encodings
+           that could be smaller, and the UTF-16 surrogates.  */
+        if (enh_check
+            && ((s1 & 0xc0) != 0x80
+                || (s2 & 0xc0) != 0x80
+                || c < 0x1000
+                || (c >= 0xd800 && c <= 0xdfff))) {
+            return 2;
+        }
+    } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
+        /* four byte character */
+        l = 4;
+        if (ilen < 4) {
+            return 0;
+        }
+        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
+        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
+        s3 = cpu_ldub_data_ra(env, addr + 3, ra);
+        c = s0 & 0x07;
+        c = (c << 6) | (s1 & 0x3f);
+        c = (c << 6) | (s2 & 0x3f);
+        c = (c << 6) | (s3 & 0x3f);
+        /* See above.  */
+        if (enh_check
+            && ((s1 & 0xc0) != 0x80
+                || (s2 & 0xc0) != 0x80
+                || (s3 & 0xc0) != 0x80
+                || c < 0x010000
+                || c > 0x10ffff)) {
+            return 2;
+        }
+    } else {
+        /* invalid character */
+        return 2;
+    }
+
+    *ochar = c;
+    *olen = l;
+    return -1;
+}
+
+static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
+                        bool enh_check, uintptr_t ra,
+                        uint32_t *ochar, uint32_t *olen)
+{
+    uint16_t s0, s1;
+    uint32_t c, l;
+
+    if (ilen < 2) {
+        return 0;
+    }
+    s0 = cpu_lduw_data_ra(env, addr, ra);
+    if ((s0 & 0xfc00) != 0xd800) {
+        /* one word character */
+        l = 2;
+        c = s0;
+    } else {
+        /* two word character */
+        l = 4;
+        if (ilen < 4) {
+            return 0;
+        }
+        s1 = cpu_lduw_data_ra(env, addr + 2, ra);
+        c = extract32(s0, 6, 4) + 1;
+        c = (c << 6) | (s0 & 0x3f);
+        c = (c << 10) | (s1 & 0x3ff);
+        if (enh_check && (s1 & 0xfc00) != 0xdc00) {
+            /* invalid surrogate character */
+            return 2;
+        }
+    }
+
+    *ochar = c;
+    *olen = l;
+    return -1;
+}
+
+static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
+                        bool enh_check, uintptr_t ra,
+                        uint32_t *ochar, uint32_t *olen)
+{
+    uint32_t c;
+
+    if (ilen < 4) {
+        return 0;
+    }
+    c = cpu_ldl_data_ra(env, addr, ra);
+    if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
+        /* invalid unicode character */
+        return 2;
+    }
+
+    *ochar = c;
+    *olen = 4;
+    return -1;
+}
+
+static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
+                       uintptr_t ra, uint32_t c, uint32_t *olen)
+{
+    uint8_t d[4];
+    uint32_t l, i;
+
+    if (c <= 0x7f) {
+        /* one byte character */
+        l = 1;
+        d[0] = c;
+    } else if (c <= 0x7ff) {
+        /* two byte character */
+        l = 2;
+        d[1] = 0x80 | extract32(c, 0, 6);
+        d[0] = 0xc0 | extract32(c, 6, 5);
+    } else if (c <= 0xffff) {
+        /* three byte character */
+        l = 3;
+        d[2] = 0x80 | extract32(c, 0, 6);
+        d[1] = 0x80 | extract32(c, 6, 6);
+        d[0] = 0xe0 | extract32(c, 12, 4);
+    } else {
+        /* four byte character */
+        l = 4;
+        d[3] = 0x80 | extract32(c, 0, 6);
+        d[2] = 0x80 | extract32(c, 6, 6);
+        d[1] = 0x80 | extract32(c, 12, 6);
+        d[0] = 0xf0 | extract32(c, 18, 3);
+    }
+
+    if (ilen < l) {
+        return 1;
+    }
+    for (i = 0; i < l; ++i) {
+        cpu_stb_data_ra(env, addr + i, d[i], ra);
+    }
+
+    *olen = l;
+    return -1;
+}
+
+static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
+                        uintptr_t ra, uint32_t c, uint32_t *olen)
+{
+    uint16_t d0, d1;
+
+    if (c <= 0xffff) {
+        /* one word character */
+        if (ilen < 2) {
+            return 1;
+        }
+        cpu_stw_data_ra(env, addr, c, ra);
+        *olen = 2;
+    } else {
+        /* two word character */
+        if (ilen < 4) {
+            return 1;
+        }
+        d1 = 0xdc00 | extract32(c, 0, 10);
+        d0 = 0xd800 | extract32(c, 10, 6);
+        d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
+        cpu_stw_data_ra(env, addr + 0, d0, ra);
+        cpu_stw_data_ra(env, addr + 2, d1, ra);
+        *olen = 4;
+    }
+
+    return -1;
+}
+
+static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
+                        uintptr_t ra, uint32_t c, uint32_t *olen)
+{
+    if (ilen < 4) {
+        return 1;
+    }
+    cpu_stl_data_ra(env, addr, c, ra);
+    *olen = 4;
+    return -1;
+}
+
+static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
+                                       uint32_t r2, uint32_t m3, uintptr_t ra,
+                                       decode_unicode_fn decode,
+                                       encode_unicode_fn encode)
+{
+    uint64_t dst = get_address(env, r1);
+    uint64_t dlen = get_length(env, r1 + 1);
+    uint64_t src = get_address(env, r2);
+    uint64_t slen = get_length(env, r2 + 1);
+    bool enh_check = m3 & 1;
+    int cc, i;
+
+    /* Lest we fail to service interrupts in a timely manner, limit the
+       amount of work we're willing to do.  For now, let's cap at 256.  */
+    for (i = 0; i < 256; ++i) {
+        uint32_t c, ilen, olen;
+
+        cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
+        if (unlikely(cc >= 0)) {
+            break;
+        }
+        cc = encode(env, dst, dlen, ra, c, &olen);
+        if (unlikely(cc >= 0)) {
+            break;
+        }
+
+        src += ilen;
+        slen -= ilen;
+        dst += olen;
+        dlen -= olen;
+        cc = 3;
+    }
+
+    set_address(env, r1, dst);
+    set_length(env, r1 + 1, dlen);
+    set_address(env, r2, src);
+    set_length(env, r2 + 1, slen);
+
+    return cc;
+}
+
+uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
+{
+    return convert_unicode(env, r1, r2, m3, GETPC(),
+                           decode_utf8, encode_utf16);
+}
+
+uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
+{
+    return convert_unicode(env, r1, r2, m3, GETPC(),
+                           decode_utf8, encode_utf32);
+}
+
+uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
+{
+    return convert_unicode(env, r1, r2, m3, GETPC(),
+                           decode_utf16, encode_utf8);
+}
+
+uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
+{
+    return convert_unicode(env, r1, r2, m3, GETPC(),
+                           decode_utf16, encode_utf32);
+}
+
+uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
+{
+    return convert_unicode(env, r1, r2, m3, GETPC(),
+                           decode_utf32, encode_utf8);
+}
+
+uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
+{
+    return convert_unicode(env, r1, r2, m3, GETPC(),
+                           decode_utf32, encode_utf16);
+}
+
+void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
+                        uintptr_t ra)
+{
+    /* test the actual access, not just any access to the page due to LAP */
+    while (len) {
+        const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
+        const uint64_t curlen = MIN(pagelen, len);
+
+        probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
+        addr = wrap_address(env, addr + curlen);
+        len -= curlen;
+    }
+}
+
+void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
+{
+    probe_write_access(env, addr, len, GETPC());
+}
diff --git a/target/s390x/tcg/meson.build b/target/s390x/tcg/meson.build
new file mode 100644
index 0000000000..ee4e8fec77
--- /dev/null
+++ b/target/s390x/tcg/meson.build
@@ -0,0 +1,14 @@
+s390x_ss.add(when: 'CONFIG_TCG', if_true: files(
+  'cc_helper.c',
+  'crypto_helper.c',
+  'excp_helper.c',
+  'fpu_helper.c',
+  'int_helper.c',
+  'mem_helper.c',
+  'misc_helper.c',
+  'translate.c',
+  'vec_fpu_helper.c',
+  'vec_helper.c',
+  'vec_int_helper.c',
+  'vec_string_helper.c',
+))
diff --git a/target/s390x/tcg/misc_helper.c b/target/s390x/tcg/misc_helper.c
new file mode 100644
index 0000000000..33e6999e15
--- /dev/null
+++ b/target/s390x/tcg/misc_helper.c
@@ -0,0 +1,785 @@
+/*
+ *  S/390 misc helper routines
+ *
+ *  Copyright (c) 2009 Ulrich Hecht
+ *  Copyright (c) 2009 Alexander Graf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/main-loop.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "exec/memory.h"
+#include "qemu/host-utils.h"
+#include "exec/helper-proto.h"
+#include "qemu/timer.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "qapi/error.h"
+#include "tcg_s390x.h"
+#include "s390-tod.h"
+
+#if !defined(CONFIG_USER_ONLY)
+#include "sysemu/cpus.h"
+#include "sysemu/sysemu.h"
+#include "hw/s390x/ebcdic.h"
+#include "hw/s390x/s390-virtio-hcall.h"
+#include "hw/s390x/sclp.h"
+#include "hw/s390x/s390_flic.h"
+#include "hw/s390x/ioinst.h"
+#include "hw/s390x/s390-pci-inst.h"
+#include "hw/boards.h"
+#include "hw/s390x/tod.h"
+#endif
+
+/* #define DEBUG_HELPER */
+#ifdef DEBUG_HELPER
+#define HELPER_LOG(x...) qemu_log(x)
+#else
+#define HELPER_LOG(x...)
+#endif
+
+/* Raise an exception statically from a TB.  */
+void HELPER(exception)(CPUS390XState *env, uint32_t excp)
+{
+    CPUState *cs = env_cpu(env);
+
+    HELPER_LOG("%s: exception %d\n", __func__, excp);
+    cs->exception_index = excp;
+    cpu_loop_exit(cs);
+}
+
+/* Store CPU Timer (also used for EXTRACT CPU TIME) */
+uint64_t HELPER(stpt)(CPUS390XState *env)
+{
+#if defined(CONFIG_USER_ONLY)
+    /*
+     * Fake a descending CPU timer. We could get negative values here,
+     * but we don't care as it is up to the OS when to process that
+     * interrupt and reset to > 0.
+     */
+    return UINT64_MAX - (uint64_t)cpu_get_host_ticks();
+#else
+    return time2tod(env->cputm - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+#endif
+}
+
+/* Store Clock */
+uint64_t HELPER(stck)(CPUS390XState *env)
+{
+#ifdef CONFIG_USER_ONLY
+    struct timespec ts;
+    uint64_t ns;
+
+    clock_gettime(CLOCK_REALTIME, &ts);
+    ns = ts.tv_sec * NANOSECONDS_PER_SECOND + ts.tv_nsec;
+
+    return TOD_UNIX_EPOCH + time2tod(ns);
+#else
+    S390TODState *td = s390_get_todstate();
+    S390TODClass *tdc = S390_TOD_GET_CLASS(td);
+    S390TOD tod;
+
+    tdc->get(td, &tod, &error_abort);
+    return tod.low;
+#endif
+}
+
+#ifndef CONFIG_USER_ONLY
+/* SCLP service call */
+uint32_t HELPER(servc)(CPUS390XState *env, uint64_t r1, uint64_t r2)
+{
+    qemu_mutex_lock_iothread();
+    int r = sclp_service_call(env, r1, r2);
+    qemu_mutex_unlock_iothread();
+    if (r < 0) {
+        tcg_s390_program_interrupt(env, -r, GETPC());
+    }
+    return r;
+}
+
+void HELPER(diag)(CPUS390XState *env, uint32_t r1, uint32_t r3, uint32_t num)
+{
+    uint64_t r;
+
+    switch (num) {
+    case 0x500:
+        /* KVM hypercall */
+        qemu_mutex_lock_iothread();
+        r = s390_virtio_hypercall(env);
+        qemu_mutex_unlock_iothread();
+        break;
+    case 0x44:
+        /* yield */
+        r = 0;
+        break;
+    case 0x308:
+        /* ipl */
+        qemu_mutex_lock_iothread();
+        handle_diag_308(env, r1, r3, GETPC());
+        qemu_mutex_unlock_iothread();
+        r = 0;
+        break;
+    case 0x288:
+        /* time bomb (watchdog) */
+        r = handle_diag_288(env, r1, r3);
+        break;
+    default:
+        r = -1;
+        break;
+    }
+
+    if (r) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
+    }
+}
+
+/* Set Prefix */
+void HELPER(spx)(CPUS390XState *env, uint64_t a1)
+{
+    CPUState *cs = env_cpu(env);
+    uint32_t prefix = a1 & 0x7fffe000;
+
+    env->psa = prefix;
+    HELPER_LOG("prefix: %#x\n", prefix);
+    tlb_flush_page(cs, 0);
+    tlb_flush_page(cs, TARGET_PAGE_SIZE);
+}
+
+static void update_ckc_timer(CPUS390XState *env)
+{
+    S390TODState *td = s390_get_todstate();
+    uint64_t time;
+
+    /* stop the timer and remove pending CKC IRQs */
+    timer_del(env->tod_timer);
+    g_assert(qemu_mutex_iothread_locked());
+    env->pending_int &= ~INTERRUPT_EXT_CLOCK_COMPARATOR;
+
+    /* the tod has to exceed the ckc, this can never happen if ckc is all 1's */
+    if (env->ckc == -1ULL) {
+        return;
+    }
+
+    /* difference between origins */
+    time = env->ckc - td->base.low;
+
+    /* nanoseconds */
+    time = tod2time(time);
+
+    timer_mod(env->tod_timer, time);
+}
+
+/* Set Clock Comparator */
+void HELPER(sckc)(CPUS390XState *env, uint64_t ckc)
+{
+    env->ckc = ckc;
+
+    qemu_mutex_lock_iothread();
+    update_ckc_timer(env);
+    qemu_mutex_unlock_iothread();
+}
+
+void tcg_s390_tod_updated(CPUState *cs, run_on_cpu_data opaque)
+{
+    S390CPU *cpu = S390_CPU(cs);
+
+    update_ckc_timer(&cpu->env);
+}
+
+/* Set Clock */
+uint32_t HELPER(sck)(CPUS390XState *env, uint64_t tod_low)
+{
+    S390TODState *td = s390_get_todstate();
+    S390TODClass *tdc = S390_TOD_GET_CLASS(td);
+    S390TOD tod = {
+        .high = 0,
+        .low = tod_low,
+    };
+
+    qemu_mutex_lock_iothread();
+    tdc->set(td, &tod, &error_abort);
+    qemu_mutex_unlock_iothread();
+    return 0;
+}
+
+/* Set Tod Programmable Field */
+void HELPER(sckpf)(CPUS390XState *env, uint64_t r0)
+{
+    uint32_t val = r0;
+
+    if (val & 0xffff0000) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
+    }
+    env->todpr = val;
+}
+
+/* Store Clock Comparator */
+uint64_t HELPER(stckc)(CPUS390XState *env)
+{
+    return env->ckc;
+}
+
+/* Set CPU Timer */
+void HELPER(spt)(CPUS390XState *env, uint64_t time)
+{
+    if (time == -1ULL) {
+        return;
+    }
+
+    /* nanoseconds */
+    time = tod2time(time);
+
+    env->cputm = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + time;
+
+    timer_mod(env->cpu_timer, env->cputm);
+}
+
+/* Store System Information */
+uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0, uint64_t r0, uint64_t r1)
+{
+    const uintptr_t ra = GETPC();
+    const uint32_t sel1 = r0 & STSI_R0_SEL1_MASK;
+    const uint32_t sel2 = r1 & STSI_R1_SEL2_MASK;
+    const MachineState *ms = MACHINE(qdev_get_machine());
+    uint16_t total_cpus = 0, conf_cpus = 0, reserved_cpus = 0;
+    S390CPU *cpu = env_archcpu(env);
+    SysIB sysib = { };
+    int i, cc = 0;
+
+    if ((r0 & STSI_R0_FC_MASK) > STSI_R0_FC_LEVEL_3) {
+        /* invalid function code: no other checks are performed */
+        return 3;
+    }
+
+    if ((r0 & STSI_R0_RESERVED_MASK) || (r1 & STSI_R1_RESERVED_MASK)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    if ((r0 & STSI_R0_FC_MASK) == STSI_R0_FC_CURRENT) {
+        /* query the current level: no further checks are performed */
+        env->regs[0] = STSI_R0_FC_LEVEL_3;
+        return 0;
+    }
+
+    if (a0 & ~TARGET_PAGE_MASK) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    /* count the cpus and split them into configured and reserved ones */
+    for (i = 0; i < ms->possible_cpus->len; i++) {
+        total_cpus++;
+        if (ms->possible_cpus->cpus[i].cpu) {
+            conf_cpus++;
+        } else {
+            reserved_cpus++;
+        }
+    }
+
+    /*
+     * In theory, we could report Level 1 / Level 2 as current. However,
+     * the Linux kernel will detect this as running under LPAR and assume
+     * that we have a sclp linemode console (which is always present on
+     * LPAR, but not the default for QEMU), therefore not displaying boot
+     * messages and making booting a Linux kernel under TCG harder.
+     *
+     * For now we fake the same SMP configuration on all levels.
+     *
+     * TODO: We could later make the level configurable via the machine
+     *       and change defaults (linemode console) based on machine type
+     *       and accelerator.
+     */
+    switch (r0 & STSI_R0_FC_MASK) {
+    case STSI_R0_FC_LEVEL_1:
+        if ((sel1 == 1) && (sel2 == 1)) {
+            /* Basic Machine Configuration */
+            char type[5] = {};
+
+            ebcdic_put(sysib.sysib_111.manuf, "QEMU            ", 16);
+            /* same as machine type number in STORE CPU ID, but in EBCDIC */
+            snprintf(type, ARRAY_SIZE(type), "%X", cpu->model->def->type);
+            ebcdic_put(sysib.sysib_111.type, type, 4);
+            /* model number (not stored in STORE CPU ID for z/Architecure) */
+            ebcdic_put(sysib.sysib_111.model, "QEMU            ", 16);
+            ebcdic_put(sysib.sysib_111.sequence, "QEMU            ", 16);
+            ebcdic_put(sysib.sysib_111.plant, "QEMU", 4);
+        } else if ((sel1 == 2) && (sel2 == 1)) {
+            /* Basic Machine CPU */
+            ebcdic_put(sysib.sysib_121.sequence, "QEMUQEMUQEMUQEMU", 16);
+            ebcdic_put(sysib.sysib_121.plant, "QEMU", 4);
+            sysib.sysib_121.cpu_addr = cpu_to_be16(env->core_id);
+        } else if ((sel1 == 2) && (sel2 == 2)) {
+            /* Basic Machine CPUs */
+            sysib.sysib_122.capability = cpu_to_be32(0x443afc29);
+            sysib.sysib_122.total_cpus = cpu_to_be16(total_cpus);
+            sysib.sysib_122.conf_cpus = cpu_to_be16(conf_cpus);
+            sysib.sysib_122.reserved_cpus = cpu_to_be16(reserved_cpus);
+        } else {
+            cc = 3;
+        }
+        break;
+    case STSI_R0_FC_LEVEL_2:
+        if ((sel1 == 2) && (sel2 == 1)) {
+            /* LPAR CPU */
+            ebcdic_put(sysib.sysib_221.sequence, "QEMUQEMUQEMUQEMU", 16);
+            ebcdic_put(sysib.sysib_221.plant, "QEMU", 4);
+            sysib.sysib_221.cpu_addr = cpu_to_be16(env->core_id);
+        } else if ((sel1 == 2) && (sel2 == 2)) {
+            /* LPAR CPUs */
+            sysib.sysib_222.lcpuc = 0x80; /* dedicated */
+            sysib.sysib_222.total_cpus = cpu_to_be16(total_cpus);
+            sysib.sysib_222.conf_cpus = cpu_to_be16(conf_cpus);
+            sysib.sysib_222.reserved_cpus = cpu_to_be16(reserved_cpus);
+            ebcdic_put(sysib.sysib_222.name, "QEMU    ", 8);
+            sysib.sysib_222.caf = cpu_to_be32(1000);
+            sysib.sysib_222.dedicated_cpus = cpu_to_be16(conf_cpus);
+        } else {
+            cc = 3;
+        }
+        break;
+    case STSI_R0_FC_LEVEL_3:
+        if ((sel1 == 2) && (sel2 == 2)) {
+            /* VM CPUs */
+            sysib.sysib_322.count = 1;
+            sysib.sysib_322.vm[0].total_cpus = cpu_to_be16(total_cpus);
+            sysib.sysib_322.vm[0].conf_cpus = cpu_to_be16(conf_cpus);
+            sysib.sysib_322.vm[0].reserved_cpus = cpu_to_be16(reserved_cpus);
+            sysib.sysib_322.vm[0].caf = cpu_to_be32(1000);
+            /* Linux kernel uses this to distinguish us from z/VM */
+            ebcdic_put(sysib.sysib_322.vm[0].cpi, "KVM/Linux       ", 16);
+            sysib.sysib_322.vm[0].ext_name_encoding = 2; /* UTF-8 */
+
+            /* If our VM has a name, use the real name */
+            if (qemu_name) {
+                memset(sysib.sysib_322.vm[0].name, 0x40,
+                       sizeof(sysib.sysib_322.vm[0].name));
+                ebcdic_put(sysib.sysib_322.vm[0].name, qemu_name,
+                           MIN(sizeof(sysib.sysib_322.vm[0].name),
+                               strlen(qemu_name)));
+                strpadcpy((char *)sysib.sysib_322.ext_names[0],
+                          sizeof(sysib.sysib_322.ext_names[0]),
+                          qemu_name, '\0');
+
+            } else {
+                ebcdic_put(sysib.sysib_322.vm[0].name, "TCGguest", 8);
+                strcpy((char *)sysib.sysib_322.ext_names[0], "TCGguest");
+            }
+
+            /* add the uuid */
+            memcpy(sysib.sysib_322.vm[0].uuid, &qemu_uuid,
+                   sizeof(sysib.sysib_322.vm[0].uuid));
+        } else {
+            cc = 3;
+        }
+        break;
+    }
+
+    if (cc == 0) {
+        if (s390_cpu_virt_mem_write(cpu, a0, 0, &sysib, sizeof(sysib))) {
+            s390_cpu_virt_mem_handle_exc(cpu, ra);
+        }
+    }
+
+    return cc;
+}
+
+uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1,
+                      uint32_t r3)
+{
+    int cc;
+
+    /* TODO: needed to inject interrupts  - push further down */
+    qemu_mutex_lock_iothread();
+    cc = handle_sigp(env, order_code & SIGP_ORDER_MASK, r1, r3);
+    qemu_mutex_unlock_iothread();
+
+    return cc;
+}
+#endif
+
+#ifndef CONFIG_USER_ONLY
+void HELPER(xsch)(CPUS390XState *env, uint64_t r1)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_xsch(cpu, r1, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(csch)(CPUS390XState *env, uint64_t r1)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_csch(cpu, r1, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(hsch)(CPUS390XState *env, uint64_t r1)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_hsch(cpu, r1, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(msch)(CPUS390XState *env, uint64_t r1, uint64_t inst)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_msch(cpu, r1, inst >> 16, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(rchp)(CPUS390XState *env, uint64_t r1)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_rchp(cpu, r1, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(rsch)(CPUS390XState *env, uint64_t r1)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_rsch(cpu, r1, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(sal)(CPUS390XState *env, uint64_t r1)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    ioinst_handle_sal(cpu, r1, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(schm)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint64_t inst)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    ioinst_handle_schm(cpu, r1, r2, inst >> 16, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(ssch)(CPUS390XState *env, uint64_t r1, uint64_t inst)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_ssch(cpu, r1, inst >> 16, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(stcrw)(CPUS390XState *env, uint64_t inst)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    ioinst_handle_stcrw(cpu, inst >> 16, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(stsch)(CPUS390XState *env, uint64_t r1, uint64_t inst)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_stsch(cpu, r1, inst >> 16, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr)
+{
+    const uintptr_t ra = GETPC();
+    S390CPU *cpu = env_archcpu(env);
+    QEMUS390FLICState *flic = s390_get_qemu_flic(s390_get_flic());
+    QEMUS390FlicIO *io = NULL;
+    LowCore *lowcore;
+
+    if (addr & 0x3) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    qemu_mutex_lock_iothread();
+    io = qemu_s390_flic_dequeue_io(flic, env->cregs[6]);
+    if (!io) {
+        qemu_mutex_unlock_iothread();
+        return 0;
+    }
+
+    if (addr) {
+        struct {
+            uint16_t id;
+            uint16_t nr;
+            uint32_t parm;
+        } intc = {
+            .id = cpu_to_be16(io->id),
+            .nr = cpu_to_be16(io->nr),
+            .parm = cpu_to_be32(io->parm),
+        };
+
+        if (s390_cpu_virt_mem_write(cpu, addr, 0, &intc, sizeof(intc))) {
+            /* writing failed, reinject and properly clean up */
+            s390_io_interrupt(io->id, io->nr, io->parm, io->word);
+            qemu_mutex_unlock_iothread();
+            g_free(io);
+            s390_cpu_virt_mem_handle_exc(cpu, ra);
+            return 0;
+        }
+    } else {
+        /* no protection applies */
+        lowcore = cpu_map_lowcore(env);
+        lowcore->subchannel_id = cpu_to_be16(io->id);
+        lowcore->subchannel_nr = cpu_to_be16(io->nr);
+        lowcore->io_int_parm = cpu_to_be32(io->parm);
+        lowcore->io_int_word = cpu_to_be32(io->word);
+        cpu_unmap_lowcore(lowcore);
+    }
+
+    g_free(io);
+    qemu_mutex_unlock_iothread();
+    return 1;
+}
+
+void HELPER(tsch)(CPUS390XState *env, uint64_t r1, uint64_t inst)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_tsch(cpu, r1, inst >> 16, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(chsc)(CPUS390XState *env, uint64_t inst)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_chsc(cpu, inst >> 16, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+#endif
+
+#ifndef CONFIG_USER_ONLY
+void HELPER(per_check_exception)(CPUS390XState *env)
+{
+    if (env->per_perc_atmid) {
+        tcg_s390_program_interrupt(env, PGM_PER, GETPC());
+    }
+}
+
+/* Check if an address is within the PER starting address and the PER
+   ending address.  The address range might loop.  */
+static inline bool get_per_in_range(CPUS390XState *env, uint64_t addr)
+{
+    if (env->cregs[10] <= env->cregs[11]) {
+        return env->cregs[10] <= addr && addr <= env->cregs[11];
+    } else {
+        return env->cregs[10] <= addr || addr <= env->cregs[11];
+    }
+}
+
+void HELPER(per_branch)(CPUS390XState *env, uint64_t from, uint64_t to)
+{
+    if ((env->cregs[9] & PER_CR9_EVENT_BRANCH)) {
+        if (!(env->cregs[9] & PER_CR9_CONTROL_BRANCH_ADDRESS)
+            || get_per_in_range(env, to)) {
+            env->per_address = from;
+            env->per_perc_atmid = PER_CODE_EVENT_BRANCH | get_per_atmid(env);
+        }
+    }
+}
+
+void HELPER(per_ifetch)(CPUS390XState *env, uint64_t addr)
+{
+    if ((env->cregs[9] & PER_CR9_EVENT_IFETCH) && get_per_in_range(env, addr)) {
+        env->per_address = addr;
+        env->per_perc_atmid = PER_CODE_EVENT_IFETCH | get_per_atmid(env);
+
+        /* If the instruction has to be nullified, trigger the
+           exception immediately. */
+        if (env->cregs[9] & PER_CR9_EVENT_NULLIFICATION) {
+            CPUState *cs = env_cpu(env);
+
+            env->per_perc_atmid |= PER_CODE_EVENT_NULLIFICATION;
+            env->int_pgm_code = PGM_PER;
+            env->int_pgm_ilen = get_ilen(cpu_ldub_code(env, addr));
+
+            cs->exception_index = EXCP_PGM;
+            cpu_loop_exit(cs);
+        }
+    }
+}
+
+void HELPER(per_store_real)(CPUS390XState *env)
+{
+    if ((env->cregs[9] & PER_CR9_EVENT_STORE) &&
+        (env->cregs[9] & PER_CR9_EVENT_STORE_REAL)) {
+        /* PSW is saved just before calling the helper.  */
+        env->per_address = env->psw.addr;
+        env->per_perc_atmid = PER_CODE_EVENT_STORE_REAL | get_per_atmid(env);
+    }
+}
+#endif
+
+static uint8_t stfl_bytes[2048];
+static unsigned int used_stfl_bytes;
+
+static void prepare_stfl(void)
+{
+    static bool initialized;
+    int i;
+
+    /* racy, but we don't care, the same values are always written */
+    if (initialized) {
+        return;
+    }
+
+    s390_get_feat_block(S390_FEAT_TYPE_STFL, stfl_bytes);
+    for (i = 0; i < sizeof(stfl_bytes); i++) {
+        if (stfl_bytes[i]) {
+            used_stfl_bytes = i + 1;
+        }
+    }
+    initialized = true;
+}
+
+#ifndef CONFIG_USER_ONLY
+void HELPER(stfl)(CPUS390XState *env)
+{
+    LowCore *lowcore;
+
+    lowcore = cpu_map_lowcore(env);
+    prepare_stfl();
+    memcpy(&lowcore->stfl_fac_list, stfl_bytes, sizeof(lowcore->stfl_fac_list));
+    cpu_unmap_lowcore(lowcore);
+}
+#endif
+
+uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr)
+{
+    const uintptr_t ra = GETPC();
+    const int count_bytes = ((env->regs[0] & 0xff) + 1) * 8;
+    int max_bytes;
+    int i;
+
+    if (addr & 0x7) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    prepare_stfl();
+    max_bytes = ROUND_UP(used_stfl_bytes, 8);
+
+    /*
+     * The PoP says that doublewords beyond the highest-numbered facility
+     * bit may or may not be stored.  However, existing hardware appears to
+     * not store the words, and existing software depend on that.
+     */
+    for (i = 0; i < MIN(count_bytes, max_bytes); ++i) {
+        cpu_stb_data_ra(env, addr + i, stfl_bytes[i], ra);
+    }
+
+    env->regs[0] = deposit64(env->regs[0], 0, 8, (max_bytes / 8) - 1);
+    return count_bytes >= max_bytes ? 0 : 3;
+}
+
+#ifndef CONFIG_USER_ONLY
+/*
+ * Note: we ignore any return code of the functions called for the pci
+ * instructions, as the only time they return !0 is when the stub is
+ * called, and in that case we didn't even offer the zpci facility.
+ * The only exception is SIC, where program checks need to be handled
+ * by the caller.
+ */
+void HELPER(clp)(CPUS390XState *env, uint32_t r2)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    clp_service_call(cpu, r2, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(pcilg)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    pcilg_service_call(cpu, r1, r2, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(pcistg)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    pcistg_service_call(cpu, r1, r2, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(stpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba,
+                     uint32_t ar)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    stpcifc_service_call(cpu, r1, fiba, ar, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(sic)(CPUS390XState *env, uint64_t r1, uint64_t r3)
+{
+    int r;
+
+    qemu_mutex_lock_iothread();
+    r = css_do_sic(env, (r3 >> 27) & 0x7, r1 & 0xffff);
+    qemu_mutex_unlock_iothread();
+    /* css_do_sic() may actually return a PGM_xxx value to inject */
+    if (r) {
+        tcg_s390_program_interrupt(env, -r, GETPC());
+    }
+}
+
+void HELPER(rpcit)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    rpcit_service_call(cpu, r1, r2, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(pcistb)(CPUS390XState *env, uint32_t r1, uint32_t r3,
+                    uint64_t gaddr, uint32_t ar)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    pcistb_service_call(cpu, r1, r3, gaddr, ar, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(mpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba,
+                    uint32_t ar)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    mpcifc_service_call(cpu, r1, fiba, ar, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+#endif
diff --git a/target/s390x/tcg/s390-tod.h b/target/s390x/tcg/s390-tod.h
new file mode 100644
index 0000000000..8b74d6a6d8
--- /dev/null
+++ b/target/s390x/tcg/s390-tod.h
@@ -0,0 +1,29 @@
+/*
+ * TOD (Time Of Day) clock
+ *
+ * Copyright 2018 Red Hat, Inc.
+ * Author(s): David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TARGET_S390_TOD_H
+#define TARGET_S390_TOD_H
+
+/* The value of the TOD clock for 1.1.1970. */
+#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
+
+/* Converts ns to s390's clock format */
+static inline uint64_t time2tod(uint64_t ns)
+{
+    return (ns << 9) / 125 + (((ns & 0xff80000000000000ull) / 125) << 9);
+}
+
+/* Converts s390's clock format to ns */
+static inline uint64_t tod2time(uint64_t t)
+{
+    return ((t >> 9) * 125) + (((t & 0x1ff) * 125) >> 9);
+}
+
+#endif
diff --git a/target/s390x/tcg/tcg_s390x.h b/target/s390x/tcg/tcg_s390x.h
new file mode 100644
index 0000000000..2f54ccb027
--- /dev/null
+++ b/target/s390x/tcg/tcg_s390x.h
@@ -0,0 +1,24 @@
+/*
+ * QEMU TCG support -- s390x specific functions.
+ *
+ * Copyright 2018 Red Hat, Inc.
+ *
+ * Authors:
+ *   David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TCG_S390X_H
+#define TCG_S390X_H
+
+void tcg_s390_tod_updated(CPUState *cs, run_on_cpu_data opaque);
+void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env,
+                                              uint32_t code, uintptr_t ra);
+void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
+                                           uintptr_t ra);
+void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc,
+                                             uintptr_t ra);
+
+#endif /* TCG_S390X_H */
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
new file mode 100644
index 0000000000..92fa7656c2
--- /dev/null
+++ b/target/s390x/tcg/translate.c
@@ -0,0 +1,6672 @@
+/*
+ *  S/390 translation
+ *
+ *  Copyright (c) 2009 Ulrich Hecht
+ *  Copyright (c) 2010 Alexander Graf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* #define DEBUG_INLINE_BRANCHES */
+#define S390X_DEBUG_DISAS
+/* #define S390X_DEBUG_DISAS_VERBOSE */
+
+#ifdef S390X_DEBUG_DISAS_VERBOSE
+#  define LOG_DISAS(...) qemu_log(__VA_ARGS__)
+#else
+#  define LOG_DISAS(...) do { } while (0)
+#endif
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "disas/disas.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
+#include "qemu/log.h"
+#include "qemu/host-utils.h"
+#include "exec/cpu_ldst.h"
+#include "exec/gen-icount.h"
+#include "exec/helper-proto.h"
+#include "exec/helper-gen.h"
+
+#include "exec/translator.h"
+#include "exec/log.h"
+#include "qemu/atomic128.h"
+
+
+/* Information that (most) every instruction needs to manipulate.  */
+typedef struct DisasContext DisasContext;
+typedef struct DisasInsn DisasInsn;
+typedef struct DisasFields DisasFields;
+
+/*
+ * Define a structure to hold the decoded fields.  We'll store each inside
+ * an array indexed by an enum.  In order to conserve memory, we'll arrange
+ * for fields that do not exist at the same time to overlap, thus the "C"
+ * for compact.  For checking purposes there is an "O" for original index
+ * as well that will be applied to availability bitmaps.
+ */
+
+enum DisasFieldIndexO {
+    FLD_O_r1,
+    FLD_O_r2,
+    FLD_O_r3,
+    FLD_O_m1,
+    FLD_O_m3,
+    FLD_O_m4,
+    FLD_O_m5,
+    FLD_O_m6,
+    FLD_O_b1,
+    FLD_O_b2,
+    FLD_O_b4,
+    FLD_O_d1,
+    FLD_O_d2,
+    FLD_O_d4,
+    FLD_O_x2,
+    FLD_O_l1,
+    FLD_O_l2,
+    FLD_O_i1,
+    FLD_O_i2,
+    FLD_O_i3,
+    FLD_O_i4,
+    FLD_O_i5,
+    FLD_O_v1,
+    FLD_O_v2,
+    FLD_O_v3,
+    FLD_O_v4,
+};
+
+enum DisasFieldIndexC {
+    FLD_C_r1 = 0,
+    FLD_C_m1 = 0,
+    FLD_C_b1 = 0,
+    FLD_C_i1 = 0,
+    FLD_C_v1 = 0,
+
+    FLD_C_r2 = 1,
+    FLD_C_b2 = 1,
+    FLD_C_i2 = 1,
+
+    FLD_C_r3 = 2,
+    FLD_C_m3 = 2,
+    FLD_C_i3 = 2,
+    FLD_C_v3 = 2,
+
+    FLD_C_m4 = 3,
+    FLD_C_b4 = 3,
+    FLD_C_i4 = 3,
+    FLD_C_l1 = 3,
+    FLD_C_v4 = 3,
+
+    FLD_C_i5 = 4,
+    FLD_C_d1 = 4,
+    FLD_C_m5 = 4,
+
+    FLD_C_d2 = 5,
+    FLD_C_m6 = 5,
+
+    FLD_C_d4 = 6,
+    FLD_C_x2 = 6,
+    FLD_C_l2 = 6,
+    FLD_C_v2 = 6,
+
+    NUM_C_FIELD = 7
+};
+
+struct DisasFields {
+    uint64_t raw_insn;
+    unsigned op:8;
+    unsigned op2:8;
+    unsigned presentC:16;
+    unsigned int presentO;
+    int c[NUM_C_FIELD];
+};
+
+struct DisasContext {
+    DisasContextBase base;
+    const DisasInsn *insn;
+    DisasFields fields;
+    uint64_t ex_value;
+    /*
+     * During translate_one(), pc_tmp is used to determine the instruction
+     * to be executed after base.pc_next - e.g. next sequential instruction
+     * or a branch target.
+     */
+    uint64_t pc_tmp;
+    uint32_t ilen;
+    enum cc_op cc_op;
+    bool do_debug;
+};
+
+/* Information carried about a condition to be evaluated.  */
+typedef struct {
+    TCGCond cond:8;
+    bool is_64;
+    bool g1;
+    bool g2;
+    union {
+        struct { TCGv_i64 a, b; } s64;
+        struct { TCGv_i32 a, b; } s32;
+    } u;
+} DisasCompare;
+
+#ifdef DEBUG_INLINE_BRANCHES
+static uint64_t inline_branch_hit[CC_OP_MAX];
+static uint64_t inline_branch_miss[CC_OP_MAX];
+#endif
+
+static void pc_to_link_info(TCGv_i64 out, DisasContext *s, uint64_t pc)
+{
+    TCGv_i64 tmp;
+
+    if (s->base.tb->flags & FLAG_MASK_32) {
+        if (s->base.tb->flags & FLAG_MASK_64) {
+            tcg_gen_movi_i64(out, pc);
+            return;
+        }
+        pc |= 0x80000000;
+    }
+    assert(!(s->base.tb->flags & FLAG_MASK_64));
+    tmp = tcg_const_i64(pc);
+    tcg_gen_deposit_i64(out, out, tmp, 0, 32);
+    tcg_temp_free_i64(tmp);
+}
+
+static TCGv_i64 psw_addr;
+static TCGv_i64 psw_mask;
+static TCGv_i64 gbea;
+
+static TCGv_i32 cc_op;
+static TCGv_i64 cc_src;
+static TCGv_i64 cc_dst;
+static TCGv_i64 cc_vr;
+
+static char cpu_reg_names[16][4];
+static TCGv_i64 regs[16];
+
+void s390x_translate_init(void)
+{
+    int i;
+
+    psw_addr = tcg_global_mem_new_i64(cpu_env,
+                                      offsetof(CPUS390XState, psw.addr),
+                                      "psw_addr");
+    psw_mask = tcg_global_mem_new_i64(cpu_env,
+                                      offsetof(CPUS390XState, psw.mask),
+                                      "psw_mask");
+    gbea = tcg_global_mem_new_i64(cpu_env,
+                                  offsetof(CPUS390XState, gbea),
+                                  "gbea");
+
+    cc_op = tcg_global_mem_new_i32(cpu_env, offsetof(CPUS390XState, cc_op),
+                                   "cc_op");
+    cc_src = tcg_global_mem_new_i64(cpu_env, offsetof(CPUS390XState, cc_src),
+                                    "cc_src");
+    cc_dst = tcg_global_mem_new_i64(cpu_env, offsetof(CPUS390XState, cc_dst),
+                                    "cc_dst");
+    cc_vr = tcg_global_mem_new_i64(cpu_env, offsetof(CPUS390XState, cc_vr),
+                                   "cc_vr");
+
+    for (i = 0; i < 16; i++) {
+        snprintf(cpu_reg_names[i], sizeof(cpu_reg_names[0]), "r%d", i);
+        regs[i] = tcg_global_mem_new(cpu_env,
+                                     offsetof(CPUS390XState, regs[i]),
+                                     cpu_reg_names[i]);
+    }
+}
+
+static inline int vec_full_reg_offset(uint8_t reg)
+{
+    g_assert(reg < 32);
+    return offsetof(CPUS390XState, vregs[reg][0]);
+}
+
+static inline int vec_reg_offset(uint8_t reg, uint8_t enr, MemOp es)
+{
+    /* Convert element size (es) - e.g. MO_8 - to bytes */
+    const uint8_t bytes = 1 << es;
+    int offs = enr * bytes;
+
+    /*
+     * vregs[n][0] is the lowest 8 byte and vregs[n][1] the highest 8 byte
+     * of the 16 byte vector, on both, little and big endian systems.
+     *
+     * Big Endian (target/possible host)
+     * B:  [ 0][ 1][ 2][ 3][ 4][ 5][ 6][ 7] - [ 8][ 9][10][11][12][13][14][15]
+     * HW: [     0][     1][     2][     3] - [     4][     5][     6][     7]
+     * W:  [             0][             1] - [             2][             3]
+     * DW: [                             0] - [                             1]
+     *
+     * Little Endian (possible host)
+     * B:  [ 7][ 6][ 5][ 4][ 3][ 2][ 1][ 0] - [15][14][13][12][11][10][ 9][ 8]
+     * HW: [     3][     2][     1][     0] - [     7][     6][     5][     4]
+     * W:  [             1][             0] - [             3][             2]
+     * DW: [                             0] - [                             1]
+     *
+     * For 16 byte elements, the two 8 byte halves will not form a host
+     * int128 if the host is little endian, since they're in the wrong order.
+     * Some operations (e.g. xor) do not care. For operations like addition,
+     * the two 8 byte elements have to be loaded separately. Let's force all
+     * 16 byte operations to handle it in a special way.
+     */
+    g_assert(es <= MO_64);
+#ifndef HOST_WORDS_BIGENDIAN
+    offs ^= (8 - bytes);
+#endif
+    return offs + vec_full_reg_offset(reg);
+}
+
+static inline int freg64_offset(uint8_t reg)
+{
+    g_assert(reg < 16);
+    return vec_reg_offset(reg, 0, MO_64);
+}
+
+static inline int freg32_offset(uint8_t reg)
+{
+    g_assert(reg < 16);
+    return vec_reg_offset(reg, 0, MO_32);
+}
+
+static TCGv_i64 load_reg(int reg)
+{
+    TCGv_i64 r = tcg_temp_new_i64();
+    tcg_gen_mov_i64(r, regs[reg]);
+    return r;
+}
+
+static TCGv_i64 load_freg(int reg)
+{
+    TCGv_i64 r = tcg_temp_new_i64();
+
+    tcg_gen_ld_i64(r, cpu_env, freg64_offset(reg));
+    return r;
+}
+
+static TCGv_i64 load_freg32_i64(int reg)
+{
+    TCGv_i64 r = tcg_temp_new_i64();
+
+    tcg_gen_ld32u_i64(r, cpu_env, freg32_offset(reg));
+    return r;
+}
+
+static void store_reg(int reg, TCGv_i64 v)
+{
+    tcg_gen_mov_i64(regs[reg], v);
+}
+
+static void store_freg(int reg, TCGv_i64 v)
+{
+    tcg_gen_st_i64(v, cpu_env, freg64_offset(reg));
+}
+
+static void store_reg32_i64(int reg, TCGv_i64 v)
+{
+    /* 32 bit register writes keep the upper half */
+    tcg_gen_deposit_i64(regs[reg], regs[reg], v, 0, 32);
+}
+
+static void store_reg32h_i64(int reg, TCGv_i64 v)
+{
+    tcg_gen_deposit_i64(regs[reg], regs[reg], v, 32, 32);
+}
+
+static void store_freg32_i64(int reg, TCGv_i64 v)
+{
+    tcg_gen_st32_i64(v, cpu_env, freg32_offset(reg));
+}
+
+static void return_low128(TCGv_i64 dest)
+{
+    tcg_gen_ld_i64(dest, cpu_env, offsetof(CPUS390XState, retxl));
+}
+
+static void update_psw_addr(DisasContext *s)
+{
+    /* psw.addr */
+    tcg_gen_movi_i64(psw_addr, s->base.pc_next);
+}
+
+static void per_branch(DisasContext *s, bool to_next)
+{
+#ifndef CONFIG_USER_ONLY
+    tcg_gen_movi_i64(gbea, s->base.pc_next);
+
+    if (s->base.tb->flags & FLAG_MASK_PER) {
+        TCGv_i64 next_pc = to_next ? tcg_const_i64(s->pc_tmp) : psw_addr;
+        gen_helper_per_branch(cpu_env, gbea, next_pc);
+        if (to_next) {
+            tcg_temp_free_i64(next_pc);
+        }
+    }
+#endif
+}
+
+static void per_branch_cond(DisasContext *s, TCGCond cond,
+                            TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#ifndef CONFIG_USER_ONLY
+    if (s->base.tb->flags & FLAG_MASK_PER) {
+        TCGLabel *lab = gen_new_label();
+        tcg_gen_brcond_i64(tcg_invert_cond(cond), arg1, arg2, lab);
+
+        tcg_gen_movi_i64(gbea, s->base.pc_next);
+        gen_helper_per_branch(cpu_env, gbea, psw_addr);
+
+        gen_set_label(lab);
+    } else {
+        TCGv_i64 pc = tcg_const_i64(s->base.pc_next);
+        tcg_gen_movcond_i64(cond, gbea, arg1, arg2, gbea, pc);
+        tcg_temp_free_i64(pc);
+    }
+#endif
+}
+
+static void per_breaking_event(DisasContext *s)
+{
+    tcg_gen_movi_i64(gbea, s->base.pc_next);
+}
+
+static void update_cc_op(DisasContext *s)
+{
+    if (s->cc_op != CC_OP_DYNAMIC && s->cc_op != CC_OP_STATIC) {
+        tcg_gen_movi_i32(cc_op, s->cc_op);
+    }
+}
+
+static inline uint64_t ld_code2(CPUS390XState *env, uint64_t pc)
+{
+    return (uint64_t)cpu_lduw_code(env, pc);
+}
+
+static inline uint64_t ld_code4(CPUS390XState *env, uint64_t pc)
+{
+    return (uint64_t)(uint32_t)cpu_ldl_code(env, pc);
+}
+
+static int get_mem_index(DisasContext *s)
+{
+#ifdef CONFIG_USER_ONLY
+    return MMU_USER_IDX;
+#else
+    if (!(s->base.tb->flags & FLAG_MASK_DAT)) {
+        return MMU_REAL_IDX;
+    }
+
+    switch (s->base.tb->flags & FLAG_MASK_ASC) {
+    case PSW_ASC_PRIMARY >> FLAG_MASK_PSW_SHIFT:
+        return MMU_PRIMARY_IDX;
+    case PSW_ASC_SECONDARY >> FLAG_MASK_PSW_SHIFT:
+        return MMU_SECONDARY_IDX;
+    case PSW_ASC_HOME >> FLAG_MASK_PSW_SHIFT:
+        return MMU_HOME_IDX;
+    default:
+        tcg_abort();
+        break;
+    }
+#endif
+}
+
+static void gen_exception(int excp)
+{
+    TCGv_i32 tmp = tcg_const_i32(excp);
+    gen_helper_exception(cpu_env, tmp);
+    tcg_temp_free_i32(tmp);
+}
+
+static void gen_program_exception(DisasContext *s, int code)
+{
+    TCGv_i32 tmp;
+
+    /* Remember what pgm exeption this was.  */
+    tmp = tcg_const_i32(code);
+    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_code));
+    tcg_temp_free_i32(tmp);
+
+    tmp = tcg_const_i32(s->ilen);
+    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_ilen));
+    tcg_temp_free_i32(tmp);
+
+    /* update the psw */
+    update_psw_addr(s);
+
+    /* Save off cc.  */
+    update_cc_op(s);
+
+    /* Trigger exception.  */
+    gen_exception(EXCP_PGM);
+}
+
+static inline void gen_illegal_opcode(DisasContext *s)
+{
+    gen_program_exception(s, PGM_OPERATION);
+}
+
+static inline void gen_data_exception(uint8_t dxc)
+{
+    TCGv_i32 tmp = tcg_const_i32(dxc);
+    gen_helper_data_exception(cpu_env, tmp);
+    tcg_temp_free_i32(tmp);
+}
+
+static inline void gen_trap(DisasContext *s)
+{
+    /* Set DXC to 0xff */
+    gen_data_exception(0xff);
+}
+
+static void gen_addi_and_wrap_i64(DisasContext *s, TCGv_i64 dst, TCGv_i64 src,
+                                  int64_t imm)
+{
+    tcg_gen_addi_i64(dst, src, imm);
+    if (!(s->base.tb->flags & FLAG_MASK_64)) {
+        if (s->base.tb->flags & FLAG_MASK_32) {
+            tcg_gen_andi_i64(dst, dst, 0x7fffffff);
+        } else {
+            tcg_gen_andi_i64(dst, dst, 0x00ffffff);
+        }
+    }
+}
+
+static TCGv_i64 get_address(DisasContext *s, int x2, int b2, int d2)
+{
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    /*
+     * Note that d2 is limited to 20 bits, signed.  If we crop negative
+     * displacements early we create larger immedate addends.
+     */
+    if (b2 && x2) {
+        tcg_gen_add_i64(tmp, regs[b2], regs[x2]);
+        gen_addi_and_wrap_i64(s, tmp, tmp, d2);
+    } else if (b2) {
+        gen_addi_and_wrap_i64(s, tmp, regs[b2], d2);
+    } else if (x2) {
+        gen_addi_and_wrap_i64(s, tmp, regs[x2], d2);
+    } else if (!(s->base.tb->flags & FLAG_MASK_64)) {
+        if (s->base.tb->flags & FLAG_MASK_32) {
+            tcg_gen_movi_i64(tmp, d2 & 0x7fffffff);
+        } else {
+            tcg_gen_movi_i64(tmp, d2 & 0x00ffffff);
+        }
+    } else {
+        tcg_gen_movi_i64(tmp, d2);
+    }
+
+    return tmp;
+}
+
+static inline bool live_cc_data(DisasContext *s)
+{
+    return (s->cc_op != CC_OP_DYNAMIC
+            && s->cc_op != CC_OP_STATIC
+            && s->cc_op > 3);
+}
+
+static inline void gen_op_movi_cc(DisasContext *s, uint32_t val)
+{
+    if (live_cc_data(s)) {
+        tcg_gen_discard_i64(cc_src);
+        tcg_gen_discard_i64(cc_dst);
+        tcg_gen_discard_i64(cc_vr);
+    }
+    s->cc_op = CC_OP_CONST0 + val;
+}
+
+static void gen_op_update1_cc_i64(DisasContext *s, enum cc_op op, TCGv_i64 dst)
+{
+    if (live_cc_data(s)) {
+        tcg_gen_discard_i64(cc_src);
+        tcg_gen_discard_i64(cc_vr);
+    }
+    tcg_gen_mov_i64(cc_dst, dst);
+    s->cc_op = op;
+}
+
+static void gen_op_update2_cc_i64(DisasContext *s, enum cc_op op, TCGv_i64 src,
+                                  TCGv_i64 dst)
+{
+    if (live_cc_data(s)) {
+        tcg_gen_discard_i64(cc_vr);
+    }
+    tcg_gen_mov_i64(cc_src, src);
+    tcg_gen_mov_i64(cc_dst, dst);
+    s->cc_op = op;
+}
+
+static void gen_op_update3_cc_i64(DisasContext *s, enum cc_op op, TCGv_i64 src,
+                                  TCGv_i64 dst, TCGv_i64 vr)
+{
+    tcg_gen_mov_i64(cc_src, src);
+    tcg_gen_mov_i64(cc_dst, dst);
+    tcg_gen_mov_i64(cc_vr, vr);
+    s->cc_op = op;
+}
+
+static void set_cc_nz_u64(DisasContext *s, TCGv_i64 val)
+{
+    gen_op_update1_cc_i64(s, CC_OP_NZ, val);
+}
+
+/* CC value is in env->cc_op */
+static void set_cc_static(DisasContext *s)
+{
+    if (live_cc_data(s)) {
+        tcg_gen_discard_i64(cc_src);
+        tcg_gen_discard_i64(cc_dst);
+        tcg_gen_discard_i64(cc_vr);
+    }
+    s->cc_op = CC_OP_STATIC;
+}
+
+/* calculates cc into cc_op */
+static void gen_op_calc_cc(DisasContext *s)
+{
+    TCGv_i32 local_cc_op = NULL;
+    TCGv_i64 dummy = NULL;
+
+    switch (s->cc_op) {
+    default:
+        dummy = tcg_const_i64(0);
+        /* FALLTHRU */
+    case CC_OP_ADD_64:
+    case CC_OP_SUB_64:
+    case CC_OP_ADD_32:
+    case CC_OP_SUB_32:
+        local_cc_op = tcg_const_i32(s->cc_op);
+        break;
+    case CC_OP_CONST0:
+    case CC_OP_CONST1:
+    case CC_OP_CONST2:
+    case CC_OP_CONST3:
+    case CC_OP_STATIC:
+    case CC_OP_DYNAMIC:
+        break;
+    }
+
+    switch (s->cc_op) {
+    case CC_OP_CONST0:
+    case CC_OP_CONST1:
+    case CC_OP_CONST2:
+    case CC_OP_CONST3:
+        /* s->cc_op is the cc value */
+        tcg_gen_movi_i32(cc_op, s->cc_op - CC_OP_CONST0);
+        break;
+    case CC_OP_STATIC:
+        /* env->cc_op already is the cc value */
+        break;
+    case CC_OP_NZ:
+    case CC_OP_ABS_64:
+    case CC_OP_NABS_64:
+    case CC_OP_ABS_32:
+    case CC_OP_NABS_32:
+    case CC_OP_LTGT0_32:
+    case CC_OP_LTGT0_64:
+    case CC_OP_COMP_32:
+    case CC_OP_COMP_64:
+    case CC_OP_NZ_F32:
+    case CC_OP_NZ_F64:
+    case CC_OP_FLOGR:
+    case CC_OP_LCBB:
+    case CC_OP_MULS_32:
+        /* 1 argument */
+        gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, dummy, cc_dst, dummy);
+        break;
+    case CC_OP_ADDU:
+    case CC_OP_ICM:
+    case CC_OP_LTGT_32:
+    case CC_OP_LTGT_64:
+    case CC_OP_LTUGTU_32:
+    case CC_OP_LTUGTU_64:
+    case CC_OP_TM_32:
+    case CC_OP_TM_64:
+    case CC_OP_SLA_32:
+    case CC_OP_SLA_64:
+    case CC_OP_SUBU:
+    case CC_OP_NZ_F128:
+    case CC_OP_VC:
+    case CC_OP_MULS_64:
+        /* 2 arguments */
+        gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, dummy);
+        break;
+    case CC_OP_ADD_64:
+    case CC_OP_SUB_64:
+    case CC_OP_ADD_32:
+    case CC_OP_SUB_32:
+        /* 3 arguments */
+        gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, cc_vr);
+        break;
+    case CC_OP_DYNAMIC:
+        /* unknown operation - assume 3 arguments and cc_op in env */
+        gen_helper_calc_cc(cc_op, cpu_env, cc_op, cc_src, cc_dst, cc_vr);
+        break;
+    default:
+        tcg_abort();
+    }
+
+    if (local_cc_op) {
+        tcg_temp_free_i32(local_cc_op);
+    }
+    if (dummy) {
+        tcg_temp_free_i64(dummy);
+    }
+
+    /* We now have cc in cc_op as constant */
+    set_cc_static(s);
+}
+
+static bool use_goto_tb(DisasContext *s, uint64_t dest)
+{
+    if (unlikely(s->base.tb->flags & FLAG_MASK_PER)) {
+        return false;
+    }
+    return translator_use_goto_tb(&s->base, dest);
+}
+
+static void account_noninline_branch(DisasContext *s, int cc_op)
+{
+#ifdef DEBUG_INLINE_BRANCHES
+    inline_branch_miss[cc_op]++;
+#endif
+}
+
+static void account_inline_branch(DisasContext *s, int cc_op)
+{
+#ifdef DEBUG_INLINE_BRANCHES
+    inline_branch_hit[cc_op]++;
+#endif
+}
+
+/* Table of mask values to comparison codes, given a comparison as input.
+   For such, CC=3 should not be possible.  */
+static const TCGCond ltgt_cond[16] = {
+    TCG_COND_NEVER,  TCG_COND_NEVER,     /*    |    |    | x */
+    TCG_COND_GT,     TCG_COND_GT,        /*    |    | GT | x */
+    TCG_COND_LT,     TCG_COND_LT,        /*    | LT |    | x */
+    TCG_COND_NE,     TCG_COND_NE,        /*    | LT | GT | x */
+    TCG_COND_EQ,     TCG_COND_EQ,        /* EQ |    |    | x */
+    TCG_COND_GE,     TCG_COND_GE,        /* EQ |    | GT | x */
+    TCG_COND_LE,     TCG_COND_LE,        /* EQ | LT |    | x */
+    TCG_COND_ALWAYS, TCG_COND_ALWAYS,    /* EQ | LT | GT | x */
+};
+
+/* Table of mask values to comparison codes, given a logic op as input.
+   For such, only CC=0 and CC=1 should be possible.  */
+static const TCGCond nz_cond[16] = {
+    TCG_COND_NEVER, TCG_COND_NEVER,      /*    |    | x | x */
+    TCG_COND_NEVER, TCG_COND_NEVER,
+    TCG_COND_NE, TCG_COND_NE,            /*    | NE | x | x */
+    TCG_COND_NE, TCG_COND_NE,
+    TCG_COND_EQ, TCG_COND_EQ,            /* EQ |    | x | x */
+    TCG_COND_EQ, TCG_COND_EQ,
+    TCG_COND_ALWAYS, TCG_COND_ALWAYS,    /* EQ | NE | x | x */
+    TCG_COND_ALWAYS, TCG_COND_ALWAYS,
+};
+
+/* Interpret MASK in terms of S->CC_OP, and fill in C with all the
+   details required to generate a TCG comparison.  */
+static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
+{
+    TCGCond cond;
+    enum cc_op old_cc_op = s->cc_op;
+
+    if (mask == 15 || mask == 0) {
+        c->cond = (mask ? TCG_COND_ALWAYS : TCG_COND_NEVER);
+        c->u.s32.a = cc_op;
+        c->u.s32.b = cc_op;
+        c->g1 = c->g2 = true;
+        c->is_64 = false;
+        return;
+    }
+
+    /* Find the TCG condition for the mask + cc op.  */
+    switch (old_cc_op) {
+    case CC_OP_LTGT0_32:
+    case CC_OP_LTGT0_64:
+    case CC_OP_LTGT_32:
+    case CC_OP_LTGT_64:
+        cond = ltgt_cond[mask];
+        if (cond == TCG_COND_NEVER) {
+            goto do_dynamic;
+        }
+        account_inline_branch(s, old_cc_op);
+        break;
+
+    case CC_OP_LTUGTU_32:
+    case CC_OP_LTUGTU_64:
+        cond = tcg_unsigned_cond(ltgt_cond[mask]);
+        if (cond == TCG_COND_NEVER) {
+            goto do_dynamic;
+        }
+        account_inline_branch(s, old_cc_op);
+        break;
+
+    case CC_OP_NZ:
+        cond = nz_cond[mask];
+        if (cond == TCG_COND_NEVER) {
+            goto do_dynamic;
+        }
+        account_inline_branch(s, old_cc_op);
+        break;
+
+    case CC_OP_TM_32:
+    case CC_OP_TM_64:
+        switch (mask) {
+        case 8:
+            cond = TCG_COND_EQ;
+            break;
+        case 4 | 2 | 1:
+            cond = TCG_COND_NE;
+            break;
+        default:
+            goto do_dynamic;
+        }
+        account_inline_branch(s, old_cc_op);
+        break;
+
+    case CC_OP_ICM:
+        switch (mask) {
+        case 8:
+            cond = TCG_COND_EQ;
+            break;
+        case 4 | 2 | 1:
+        case 4 | 2:
+            cond = TCG_COND_NE;
+            break;
+        default:
+            goto do_dynamic;
+        }
+        account_inline_branch(s, old_cc_op);
+        break;
+
+    case CC_OP_FLOGR:
+        switch (mask & 0xa) {
+        case 8: /* src == 0 -> no one bit found */
+            cond = TCG_COND_EQ;
+            break;
+        case 2: /* src != 0 -> one bit found */
+            cond = TCG_COND_NE;
+            break;
+        default:
+            goto do_dynamic;
+        }
+        account_inline_branch(s, old_cc_op);
+        break;
+
+    case CC_OP_ADDU:
+    case CC_OP_SUBU:
+        switch (mask) {
+        case 8 | 2: /* result == 0 */
+            cond = TCG_COND_EQ;
+            break;
+        case 4 | 1: /* result != 0 */
+            cond = TCG_COND_NE;
+            break;
+        case 8 | 4: /* !carry (borrow) */
+            cond = old_cc_op == CC_OP_ADDU ? TCG_COND_EQ : TCG_COND_NE;
+            break;
+        case 2 | 1: /* carry (!borrow) */
+            cond = old_cc_op == CC_OP_ADDU ? TCG_COND_NE : TCG_COND_EQ;
+            break;
+        default:
+            goto do_dynamic;
+        }
+        account_inline_branch(s, old_cc_op);
+        break;
+
+    default:
+    do_dynamic:
+        /* Calculate cc value.  */
+        gen_op_calc_cc(s);
+        /* FALLTHRU */
+
+    case CC_OP_STATIC:
+        /* Jump based on CC.  We'll load up the real cond below;
+           the assignment here merely avoids a compiler warning.  */
+        account_noninline_branch(s, old_cc_op);
+        old_cc_op = CC_OP_STATIC;
+        cond = TCG_COND_NEVER;
+        break;
+    }
+
+    /* Load up the arguments of the comparison.  */
+    c->is_64 = true;
+    c->g1 = c->g2 = false;
+    switch (old_cc_op) {
+    case CC_OP_LTGT0_32:
+        c->is_64 = false;
+        c->u.s32.a = tcg_temp_new_i32();
+        tcg_gen_extrl_i64_i32(c->u.s32.a, cc_dst);
+        c->u.s32.b = tcg_const_i32(0);
+        break;
+    case CC_OP_LTGT_32:
+    case CC_OP_LTUGTU_32:
+        c->is_64 = false;
+        c->u.s32.a = tcg_temp_new_i32();
+        tcg_gen_extrl_i64_i32(c->u.s32.a, cc_src);
+        c->u.s32.b = tcg_temp_new_i32();
+        tcg_gen_extrl_i64_i32(c->u.s32.b, cc_dst);
+        break;
+
+    case CC_OP_LTGT0_64:
+    case CC_OP_NZ:
+    case CC_OP_FLOGR:
+        c->u.s64.a = cc_dst;
+        c->u.s64.b = tcg_const_i64(0);
+        c->g1 = true;
+        break;
+    case CC_OP_LTGT_64:
+    case CC_OP_LTUGTU_64:
+        c->u.s64.a = cc_src;
+        c->u.s64.b = cc_dst;
+        c->g1 = c->g2 = true;
+        break;
+
+    case CC_OP_TM_32:
+    case CC_OP_TM_64:
+    case CC_OP_ICM:
+        c->u.s64.a = tcg_temp_new_i64();
+        c->u.s64.b = tcg_const_i64(0);
+        tcg_gen_and_i64(c->u.s64.a, cc_src, cc_dst);
+        break;
+
+    case CC_OP_ADDU:
+    case CC_OP_SUBU:
+        c->is_64 = true;
+        c->u.s64.b = tcg_const_i64(0);
+        c->g1 = true;
+        switch (mask) {
+        case 8 | 2:
+        case 4 | 1: /* result */
+            c->u.s64.a = cc_dst;
+            break;
+        case 8 | 4:
+        case 2 | 1: /* carry */
+            c->u.s64.a = cc_src;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        break;
+
+    case CC_OP_STATIC:
+        c->is_64 = false;
+        c->u.s32.a = cc_op;
+        c->g1 = true;
+        switch (mask) {
+        case 0x8 | 0x4 | 0x2: /* cc != 3 */
+            cond = TCG_COND_NE;
+            c->u.s32.b = tcg_const_i32(3);
+            break;
+        case 0x8 | 0x4 | 0x1: /* cc != 2 */
+            cond = TCG_COND_NE;
+            c->u.s32.b = tcg_const_i32(2);
+            break;
+        case 0x8 | 0x2 | 0x1: /* cc != 1 */
+            cond = TCG_COND_NE;
+            c->u.s32.b = tcg_const_i32(1);
+            break;
+        case 0x8 | 0x2: /* cc == 0 || cc == 2 => (cc & 1) == 0 */
+            cond = TCG_COND_EQ;
+            c->g1 = false;
+            c->u.s32.a = tcg_temp_new_i32();
+            c->u.s32.b = tcg_const_i32(0);
+            tcg_gen_andi_i32(c->u.s32.a, cc_op, 1);
+            break;
+        case 0x8 | 0x4: /* cc < 2 */
+            cond = TCG_COND_LTU;
+            c->u.s32.b = tcg_const_i32(2);
+            break;
+        case 0x8: /* cc == 0 */
+            cond = TCG_COND_EQ;
+            c->u.s32.b = tcg_const_i32(0);
+            break;
+        case 0x4 | 0x2 | 0x1: /* cc != 0 */
+            cond = TCG_COND_NE;
+            c->u.s32.b = tcg_const_i32(0);
+            break;
+        case 0x4 | 0x1: /* cc == 1 || cc == 3 => (cc & 1) != 0 */
+            cond = TCG_COND_NE;
+            c->g1 = false;
+            c->u.s32.a = tcg_temp_new_i32();
+            c->u.s32.b = tcg_const_i32(0);
+            tcg_gen_andi_i32(c->u.s32.a, cc_op, 1);
+            break;
+        case 0x4: /* cc == 1 */
+            cond = TCG_COND_EQ;
+            c->u.s32.b = tcg_const_i32(1);
+            break;
+        case 0x2 | 0x1: /* cc > 1 */
+            cond = TCG_COND_GTU;
+            c->u.s32.b = tcg_const_i32(1);
+            break;
+        case 0x2: /* cc == 2 */
+            cond = TCG_COND_EQ;
+            c->u.s32.b = tcg_const_i32(2);
+            break;
+        case 0x1: /* cc == 3 */
+            cond = TCG_COND_EQ;
+            c->u.s32.b = tcg_const_i32(3);
+            break;
+        default:
+            /* CC is masked by something else: (8 >> cc) & mask.  */
+            cond = TCG_COND_NE;
+            c->g1 = false;
+            c->u.s32.a = tcg_const_i32(8);
+            c->u.s32.b = tcg_const_i32(0);
+            tcg_gen_shr_i32(c->u.s32.a, c->u.s32.a, cc_op);
+            tcg_gen_andi_i32(c->u.s32.a, c->u.s32.a, mask);
+            break;
+        }
+        break;
+
+    default:
+        abort();
+    }
+    c->cond = cond;
+}
+
+static void free_compare(DisasCompare *c)
+{
+    if (!c->g1) {
+        if (c->is_64) {
+            tcg_temp_free_i64(c->u.s64.a);
+        } else {
+            tcg_temp_free_i32(c->u.s32.a);
+        }
+    }
+    if (!c->g2) {
+        if (c->is_64) {
+            tcg_temp_free_i64(c->u.s64.b);
+        } else {
+            tcg_temp_free_i32(c->u.s32.b);
+        }
+    }
+}
+
+/* ====================================================================== */
+/* Define the insn format enumeration.  */
+#define F0(N)                         FMT_##N,
+#define F1(N, X1)                     F0(N)
+#define F2(N, X1, X2)                 F0(N)
+#define F3(N, X1, X2, X3)             F0(N)
+#define F4(N, X1, X2, X3, X4)         F0(N)
+#define F5(N, X1, X2, X3, X4, X5)     F0(N)
+#define F6(N, X1, X2, X3, X4, X5, X6) F0(N)
+
+typedef enum {
+#include "insn-format.def"
+} DisasFormat;
+
+#undef F0
+#undef F1
+#undef F2
+#undef F3
+#undef F4
+#undef F5
+#undef F6
+
+/* This is the way fields are to be accessed out of DisasFields.  */
+#define have_field(S, F)  have_field1((S), FLD_O_##F)
+#define get_field(S, F)   get_field1((S), FLD_O_##F, FLD_C_##F)
+
+static bool have_field1(const DisasContext *s, enum DisasFieldIndexO c)
+{
+    return (s->fields.presentO >> c) & 1;
+}
+
+static int get_field1(const DisasContext *s, enum DisasFieldIndexO o,
+                      enum DisasFieldIndexC c)
+{
+    assert(have_field1(s, o));
+    return s->fields.c[c];
+}
+
+/* Describe the layout of each field in each format.  */
+typedef struct DisasField {
+    unsigned int beg:8;
+    unsigned int size:8;
+    unsigned int type:2;
+    unsigned int indexC:6;
+    enum DisasFieldIndexO indexO:8;
+} DisasField;
+
+typedef struct DisasFormatInfo {
+    DisasField op[NUM_C_FIELD];
+} DisasFormatInfo;
+
+#define R(N, B)       {  B,  4, 0, FLD_C_r##N, FLD_O_r##N }
+#define M(N, B)       {  B,  4, 0, FLD_C_m##N, FLD_O_m##N }
+#define V(N, B)       {  B,  4, 3, FLD_C_v##N, FLD_O_v##N }
+#define BD(N, BB, BD) { BB,  4, 0, FLD_C_b##N, FLD_O_b##N }, \
+                      { BD, 12, 0, FLD_C_d##N, FLD_O_d##N }
+#define BXD(N)        { 16,  4, 0, FLD_C_b##N, FLD_O_b##N }, \
+                      { 12,  4, 0, FLD_C_x##N, FLD_O_x##N }, \
+                      { 20, 12, 0, FLD_C_d##N, FLD_O_d##N }
+#define BDL(N)        { 16,  4, 0, FLD_C_b##N, FLD_O_b##N }, \
+                      { 20, 20, 2, FLD_C_d##N, FLD_O_d##N }
+#define BXDL(N)       { 16,  4, 0, FLD_C_b##N, FLD_O_b##N }, \
+                      { 12,  4, 0, FLD_C_x##N, FLD_O_x##N }, \
+                      { 20, 20, 2, FLD_C_d##N, FLD_O_d##N }
+#define I(N, B, S)    {  B,  S, 1, FLD_C_i##N, FLD_O_i##N }
+#define L(N, B, S)    {  B,  S, 0, FLD_C_l##N, FLD_O_l##N }
+
+#define F0(N)                     { { } },
+#define F1(N, X1)                 { { X1 } },
+#define F2(N, X1, X2)             { { X1, X2 } },
+#define F3(N, X1, X2, X3)         { { X1, X2, X3 } },
+#define F4(N, X1, X2, X3, X4)     { { X1, X2, X3, X4 } },
+#define F5(N, X1, X2, X3, X4, X5) { { X1, X2, X3, X4, X5 } },
+#define F6(N, X1, X2, X3, X4, X5, X6)       { { X1, X2, X3, X4, X5, X6 } },
+
+static const DisasFormatInfo format_info[] = {
+#include "insn-format.def"
+};
+
+#undef F0
+#undef F1
+#undef F2
+#undef F3
+#undef F4
+#undef F5
+#undef F6
+#undef R
+#undef M
+#undef V
+#undef BD
+#undef BXD
+#undef BDL
+#undef BXDL
+#undef I
+#undef L
+
+/* Generally, we'll extract operands into this structures, operate upon
+   them, and store them back.  See the "in1", "in2", "prep", "wout" sets
+   of routines below for more details.  */
+typedef struct {
+    bool g_out, g_out2, g_in1, g_in2;
+    TCGv_i64 out, out2, in1, in2;
+    TCGv_i64 addr1;
+} DisasOps;
+
+/* Instructions can place constraints on their operands, raising specification
+   exceptions if they are violated.  To make this easy to automate, each "in1",
+   "in2", "prep", "wout" helper will have a SPEC_<name> define that equals one
+   of the following, or 0.  To make this easy to document, we'll put the
+   SPEC_<name> defines next to <name>.  */
+
+#define SPEC_r1_even    1
+#define SPEC_r2_even    2
+#define SPEC_r3_even    4
+#define SPEC_r1_f128    8
+#define SPEC_r2_f128    16
+
+/* Return values from translate_one, indicating the state of the TB.  */
+
+/* We are not using a goto_tb (for whatever reason), but have updated
+   the PC (for whatever reason), so there's no need to do it again on
+   exiting the TB.  */
+#define DISAS_PC_UPDATED        DISAS_TARGET_0
+
+/* We have emitted one or more goto_tb.  No fixup required.  */
+#define DISAS_GOTO_TB           DISAS_TARGET_1
+
+/* We have updated the PC and CC values.  */
+#define DISAS_PC_CC_UPDATED     DISAS_TARGET_2
+
+/* We are exiting the TB, but have neither emitted a goto_tb, nor
+   updated the PC for the next instruction to be executed.  */
+#define DISAS_PC_STALE          DISAS_TARGET_3
+
+/* We are exiting the TB to the main loop.  */
+#define DISAS_PC_STALE_NOCHAIN  DISAS_TARGET_4
+
+
+/* Instruction flags */
+#define IF_AFP1     0x0001      /* r1 is a fp reg for HFP/FPS instructions */
+#define IF_AFP2     0x0002      /* r2 is a fp reg for HFP/FPS instructions */
+#define IF_AFP3     0x0004      /* r3 is a fp reg for HFP/FPS instructions */
+#define IF_BFP      0x0008      /* binary floating point instruction */
+#define IF_DFP      0x0010      /* decimal floating point instruction */
+#define IF_PRIV     0x0020      /* privileged instruction */
+#define IF_VEC      0x0040      /* vector instruction */
+#define IF_IO       0x0080      /* input/output instruction */
+
+struct DisasInsn {
+    unsigned opc:16;
+    unsigned flags:16;
+    DisasFormat fmt:8;
+    unsigned fac:8;
+    unsigned spec:8;
+
+    const char *name;
+
+    /* Pre-process arguments before HELP_OP.  */
+    void (*help_in1)(DisasContext *, DisasOps *);
+    void (*help_in2)(DisasContext *, DisasOps *);
+    void (*help_prep)(DisasContext *, DisasOps *);
+
+    /*
+     * Post-process output after HELP_OP.
+     * Note that these are not called if HELP_OP returns DISAS_NORETURN.
+     */
+    void (*help_wout)(DisasContext *, DisasOps *);
+    void (*help_cout)(DisasContext *, DisasOps *);
+
+    /* Implement the operation itself.  */
+    DisasJumpType (*help_op)(DisasContext *, DisasOps *);
+
+    uint64_t data;
+};
+
+/* ====================================================================== */
+/* Miscellaneous helpers, used by several operations.  */
+
+static void help_l2_shift(DisasContext *s, DisasOps *o, int mask)
+{
+    int b2 = get_field(s, b2);
+    int d2 = get_field(s, d2);
+
+    if (b2 == 0) {
+        o->in2 = tcg_const_i64(d2 & mask);
+    } else {
+        o->in2 = get_address(s, 0, b2, d2);
+        tcg_gen_andi_i64(o->in2, o->in2, mask);
+    }
+}
+
+static DisasJumpType help_goto_direct(DisasContext *s, uint64_t dest)
+{
+    if (dest == s->pc_tmp) {
+        per_branch(s, true);
+        return DISAS_NEXT;
+    }
+    if (use_goto_tb(s, dest)) {
+        update_cc_op(s);
+        per_breaking_event(s);
+        tcg_gen_goto_tb(0);
+        tcg_gen_movi_i64(psw_addr, dest);
+        tcg_gen_exit_tb(s->base.tb, 0);
+        return DISAS_GOTO_TB;
+    } else {
+        tcg_gen_movi_i64(psw_addr, dest);
+        per_branch(s, false);
+        return DISAS_PC_UPDATED;
+    }
+}
+
+static DisasJumpType help_branch(DisasContext *s, DisasCompare *c,
+                                 bool is_imm, int imm, TCGv_i64 cdest)
+{
+    DisasJumpType ret;
+    uint64_t dest = s->base.pc_next + 2 * imm;
+    TCGLabel *lab;
+
+    /* Take care of the special cases first.  */
+    if (c->cond == TCG_COND_NEVER) {
+        ret = DISAS_NEXT;
+        goto egress;
+    }
+    if (is_imm) {
+        if (dest == s->pc_tmp) {
+            /* Branch to next.  */
+            per_branch(s, true);
+            ret = DISAS_NEXT;
+            goto egress;
+        }
+        if (c->cond == TCG_COND_ALWAYS) {
+            ret = help_goto_direct(s, dest);
+            goto egress;
+        }
+    } else {
+        if (!cdest) {
+            /* E.g. bcr %r0 -> no branch.  */
+            ret = DISAS_NEXT;
+            goto egress;
+        }
+        if (c->cond == TCG_COND_ALWAYS) {
+            tcg_gen_mov_i64(psw_addr, cdest);
+            per_branch(s, false);
+            ret = DISAS_PC_UPDATED;
+            goto egress;
+        }
+    }
+
+    if (use_goto_tb(s, s->pc_tmp)) {
+        if (is_imm && use_goto_tb(s, dest)) {
+            /* Both exits can use goto_tb.  */
+            update_cc_op(s);
+
+            lab = gen_new_label();
+            if (c->is_64) {
+                tcg_gen_brcond_i64(c->cond, c->u.s64.a, c->u.s64.b, lab);
+            } else {
+                tcg_gen_brcond_i32(c->cond, c->u.s32.a, c->u.s32.b, lab);
+            }
+
+            /* Branch not taken.  */
+            tcg_gen_goto_tb(0);
+            tcg_gen_movi_i64(psw_addr, s->pc_tmp);
+            tcg_gen_exit_tb(s->base.tb, 0);
+
+            /* Branch taken.  */
+            gen_set_label(lab);
+            per_breaking_event(s);
+            tcg_gen_goto_tb(1);
+            tcg_gen_movi_i64(psw_addr, dest);
+            tcg_gen_exit_tb(s->base.tb, 1);
+
+            ret = DISAS_GOTO_TB;
+        } else {
+            /* Fallthru can use goto_tb, but taken branch cannot.  */
+            /* Store taken branch destination before the brcond.  This
+               avoids having to allocate a new local temp to hold it.
+               We'll overwrite this in the not taken case anyway.  */
+            if (!is_imm) {
+                tcg_gen_mov_i64(psw_addr, cdest);
+            }
+
+            lab = gen_new_label();
+            if (c->is_64) {
+                tcg_gen_brcond_i64(c->cond, c->u.s64.a, c->u.s64.b, lab);
+            } else {
+                tcg_gen_brcond_i32(c->cond, c->u.s32.a, c->u.s32.b, lab);
+            }
+
+            /* Branch not taken.  */
+            update_cc_op(s);
+            tcg_gen_goto_tb(0);
+            tcg_gen_movi_i64(psw_addr, s->pc_tmp);
+            tcg_gen_exit_tb(s->base.tb, 0);
+
+            gen_set_label(lab);
+            if (is_imm) {
+                tcg_gen_movi_i64(psw_addr, dest);
+            }
+            per_breaking_event(s);
+            ret = DISAS_PC_UPDATED;
+        }
+    } else {
+        /* Fallthru cannot use goto_tb.  This by itself is vanishingly rare.
+           Most commonly we're single-stepping or some other condition that
+           disables all use of goto_tb.  Just update the PC and exit.  */
+
+        TCGv_i64 next = tcg_const_i64(s->pc_tmp);
+        if (is_imm) {
+            cdest = tcg_const_i64(dest);
+        }
+
+        if (c->is_64) {
+            tcg_gen_movcond_i64(c->cond, psw_addr, c->u.s64.a, c->u.s64.b,
+                                cdest, next);
+            per_branch_cond(s, c->cond, c->u.s64.a, c->u.s64.b);
+        } else {
+            TCGv_i32 t0 = tcg_temp_new_i32();
+            TCGv_i64 t1 = tcg_temp_new_i64();
+            TCGv_i64 z = tcg_const_i64(0);
+            tcg_gen_setcond_i32(c->cond, t0, c->u.s32.a, c->u.s32.b);
+            tcg_gen_extu_i32_i64(t1, t0);
+            tcg_temp_free_i32(t0);
+            tcg_gen_movcond_i64(TCG_COND_NE, psw_addr, t1, z, cdest, next);
+            per_branch_cond(s, TCG_COND_NE, t1, z);
+            tcg_temp_free_i64(t1);
+            tcg_temp_free_i64(z);
+        }
+
+        if (is_imm) {
+            tcg_temp_free_i64(cdest);
+        }
+        tcg_temp_free_i64(next);
+
+        ret = DISAS_PC_UPDATED;
+    }
+
+ egress:
+    free_compare(c);
+    return ret;
+}
+
+/* ====================================================================== */
+/* The operations.  These perform the bulk of the work for any insn,
+   usually after the operands have been loaded and output initialized.  */
+
+static DisasJumpType op_abs(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_abs_i64(o->out, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_absf32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffffull);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_absf64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffffffffffffull);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_absf128(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_andi_i64(o->out, o->in1, 0x7fffffffffffffffull);
+    tcg_gen_mov_i64(o->out2, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_add(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_add_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_addu64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_movi_i64(cc_src, 0);
+    tcg_gen_add2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
+    return DISAS_NEXT;
+}
+
+/* Compute carry into cc_src. */
+static void compute_carry(DisasContext *s)
+{
+    switch (s->cc_op) {
+    case CC_OP_ADDU:
+        /* The carry value is already in cc_src (1,0). */
+        break;
+    case CC_OP_SUBU:
+        tcg_gen_addi_i64(cc_src, cc_src, 1);
+        break;
+    default:
+        gen_op_calc_cc(s);
+        /* fall through */
+    case CC_OP_STATIC:
+        /* The carry flag is the msb of CC; compute into cc_src. */
+        tcg_gen_extu_i32_i64(cc_src, cc_op);
+        tcg_gen_shri_i64(cc_src, cc_src, 1);
+        break;
+    }
+}
+
+static DisasJumpType op_addc32(DisasContext *s, DisasOps *o)
+{
+    compute_carry(s);
+    tcg_gen_add_i64(o->out, o->in1, o->in2);
+    tcg_gen_add_i64(o->out, o->out, cc_src);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_addc64(DisasContext *s, DisasOps *o)
+{
+    compute_carry(s);
+
+    TCGv_i64 zero = tcg_const_i64(0);
+    tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, zero);
+    tcg_gen_add2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero);
+    tcg_temp_free_i64(zero);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_asi(DisasContext *s, DisasOps *o)
+{
+    bool non_atomic = !s390_has_feat(S390_FEAT_STFLE_45);
+
+    o->in1 = tcg_temp_new_i64();
+    if (non_atomic) {
+        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
+    } else {
+        /* Perform the atomic addition in memory. */
+        tcg_gen_atomic_fetch_add_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
+                                     s->insn->data);
+    }
+
+    /* Recompute also for atomic case: needed for setting CC. */
+    tcg_gen_add_i64(o->out, o->in1, o->in2);
+
+    if (non_atomic) {
+        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_asiu64(DisasContext *s, DisasOps *o)
+{
+    bool non_atomic = !s390_has_feat(S390_FEAT_STFLE_45);
+
+    o->in1 = tcg_temp_new_i64();
+    if (non_atomic) {
+        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
+    } else {
+        /* Perform the atomic addition in memory. */
+        tcg_gen_atomic_fetch_add_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
+                                     s->insn->data);
+    }
+
+    /* Recompute also for atomic case: needed for setting CC. */
+    tcg_gen_movi_i64(cc_src, 0);
+    tcg_gen_add2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
+
+    if (non_atomic) {
+        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_aeb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_aeb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_adb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_adb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_axb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_axb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_and(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_and_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_andi(DisasContext *s, DisasOps *o)
+{
+    int shift = s->insn->data & 0xff;
+    int size = s->insn->data >> 8;
+    uint64_t mask = ((1ull << size) - 1) << shift;
+
+    assert(!o->g_in2);
+    tcg_gen_shli_i64(o->in2, o->in2, shift);
+    tcg_gen_ori_i64(o->in2, o->in2, ~mask);
+    tcg_gen_and_i64(o->out, o->in1, o->in2);
+
+    /* Produce the CC from only the bits manipulated.  */
+    tcg_gen_andi_i64(cc_dst, o->out, mask);
+    set_cc_nz_u64(s, cc_dst);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ni(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+
+    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
+        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
+    } else {
+        /* Perform the atomic operation in memory. */
+        tcg_gen_atomic_fetch_and_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
+                                     s->insn->data);
+    }
+
+    /* Recompute also for atomic case: needed for setting CC. */
+    tcg_gen_and_i64(o->out, o->in1, o->in2);
+
+    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
+        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_bas(DisasContext *s, DisasOps *o)
+{
+    pc_to_link_info(o->out, s, s->pc_tmp);
+    if (o->in2) {
+        tcg_gen_mov_i64(psw_addr, o->in2);
+        per_branch(s, false);
+        return DISAS_PC_UPDATED;
+    } else {
+        return DISAS_NEXT;
+    }
+}
+
+static void save_link_info(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 t;
+
+    if (s->base.tb->flags & (FLAG_MASK_32 | FLAG_MASK_64)) {
+        pc_to_link_info(o->out, s, s->pc_tmp);
+        return;
+    }
+    gen_op_calc_cc(s);
+    tcg_gen_andi_i64(o->out, o->out, 0xffffffff00000000ull);
+    tcg_gen_ori_i64(o->out, o->out, ((s->ilen / 2) << 30) | s->pc_tmp);
+    t = tcg_temp_new_i64();
+    tcg_gen_shri_i64(t, psw_mask, 16);
+    tcg_gen_andi_i64(t, t, 0x0f000000);
+    tcg_gen_or_i64(o->out, o->out, t);
+    tcg_gen_extu_i32_i64(t, cc_op);
+    tcg_gen_shli_i64(t, t, 28);
+    tcg_gen_or_i64(o->out, o->out, t);
+    tcg_temp_free_i64(t);
+}
+
+static DisasJumpType op_bal(DisasContext *s, DisasOps *o)
+{
+    save_link_info(s, o);
+    if (o->in2) {
+        tcg_gen_mov_i64(psw_addr, o->in2);
+        per_branch(s, false);
+        return DISAS_PC_UPDATED;
+    } else {
+        return DISAS_NEXT;
+    }
+}
+
+static DisasJumpType op_basi(DisasContext *s, DisasOps *o)
+{
+    pc_to_link_info(o->out, s, s->pc_tmp);
+    return help_goto_direct(s, s->base.pc_next + 2 * get_field(s, i2));
+}
+
+static DisasJumpType op_bc(DisasContext *s, DisasOps *o)
+{
+    int m1 = get_field(s, m1);
+    bool is_imm = have_field(s, i2);
+    int imm = is_imm ? get_field(s, i2) : 0;
+    DisasCompare c;
+
+    /* BCR with R2 = 0 causes no branching */
+    if (have_field(s, r2) && get_field(s, r2) == 0) {
+        if (m1 == 14) {
+            /* Perform serialization */
+            /* FIXME: check for fast-BCR-serialization facility */
+            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+        }
+        if (m1 == 15) {
+            /* Perform serialization */
+            /* FIXME: perform checkpoint-synchronisation */
+            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+        }
+        return DISAS_NEXT;
+    }
+
+    disas_jcc(s, &c, m1);
+    return help_branch(s, &c, is_imm, imm, o->in2);
+}
+
+static DisasJumpType op_bct32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    bool is_imm = have_field(s, i2);
+    int imm = is_imm ? get_field(s, i2) : 0;
+    DisasCompare c;
+    TCGv_i64 t;
+
+    c.cond = TCG_COND_NE;
+    c.is_64 = false;
+    c.g1 = false;
+    c.g2 = false;
+
+    t = tcg_temp_new_i64();
+    tcg_gen_subi_i64(t, regs[r1], 1);
+    store_reg32_i64(r1, t);
+    c.u.s32.a = tcg_temp_new_i32();
+    c.u.s32.b = tcg_const_i32(0);
+    tcg_gen_extrl_i64_i32(c.u.s32.a, t);
+    tcg_temp_free_i64(t);
+
+    return help_branch(s, &c, is_imm, imm, o->in2);
+}
+
+static DisasJumpType op_bcth(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int imm = get_field(s, i2);
+    DisasCompare c;
+    TCGv_i64 t;
+
+    c.cond = TCG_COND_NE;
+    c.is_64 = false;
+    c.g1 = false;
+    c.g2 = false;
+
+    t = tcg_temp_new_i64();
+    tcg_gen_shri_i64(t, regs[r1], 32);
+    tcg_gen_subi_i64(t, t, 1);
+    store_reg32h_i64(r1, t);
+    c.u.s32.a = tcg_temp_new_i32();
+    c.u.s32.b = tcg_const_i32(0);
+    tcg_gen_extrl_i64_i32(c.u.s32.a, t);
+    tcg_temp_free_i64(t);
+
+    return help_branch(s, &c, 1, imm, o->in2);
+}
+
+static DisasJumpType op_bct64(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    bool is_imm = have_field(s, i2);
+    int imm = is_imm ? get_field(s, i2) : 0;
+    DisasCompare c;
+
+    c.cond = TCG_COND_NE;
+    c.is_64 = true;
+    c.g1 = true;
+    c.g2 = false;
+
+    tcg_gen_subi_i64(regs[r1], regs[r1], 1);
+    c.u.s64.a = regs[r1];
+    c.u.s64.b = tcg_const_i64(0);
+
+    return help_branch(s, &c, is_imm, imm, o->in2);
+}
+
+static DisasJumpType op_bx32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    bool is_imm = have_field(s, i2);
+    int imm = is_imm ? get_field(s, i2) : 0;
+    DisasCompare c;
+    TCGv_i64 t;
+
+    c.cond = (s->insn->data ? TCG_COND_LE : TCG_COND_GT);
+    c.is_64 = false;
+    c.g1 = false;
+    c.g2 = false;
+
+    t = tcg_temp_new_i64();
+    tcg_gen_add_i64(t, regs[r1], regs[r3]);
+    c.u.s32.a = tcg_temp_new_i32();
+    c.u.s32.b = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(c.u.s32.a, t);
+    tcg_gen_extrl_i64_i32(c.u.s32.b, regs[r3 | 1]);
+    store_reg32_i64(r1, t);
+    tcg_temp_free_i64(t);
+
+    return help_branch(s, &c, is_imm, imm, o->in2);
+}
+
+static DisasJumpType op_bx64(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    bool is_imm = have_field(s, i2);
+    int imm = is_imm ? get_field(s, i2) : 0;
+    DisasCompare c;
+
+    c.cond = (s->insn->data ? TCG_COND_LE : TCG_COND_GT);
+    c.is_64 = true;
+
+    if (r1 == (r3 | 1)) {
+        c.u.s64.b = load_reg(r3 | 1);
+        c.g2 = false;
+    } else {
+        c.u.s64.b = regs[r3 | 1];
+        c.g2 = true;
+    }
+
+    tcg_gen_add_i64(regs[r1], regs[r1], regs[r3]);
+    c.u.s64.a = regs[r1];
+    c.g1 = true;
+
+    return help_branch(s, &c, is_imm, imm, o->in2);
+}
+
+static DisasJumpType op_cj(DisasContext *s, DisasOps *o)
+{
+    int imm, m3 = get_field(s, m3);
+    bool is_imm;
+    DisasCompare c;
+
+    c.cond = ltgt_cond[m3];
+    if (s->insn->data) {
+        c.cond = tcg_unsigned_cond(c.cond);
+    }
+    c.is_64 = c.g1 = c.g2 = true;
+    c.u.s64.a = o->in1;
+    c.u.s64.b = o->in2;
+
+    is_imm = have_field(s, i4);
+    if (is_imm) {
+        imm = get_field(s, i4);
+    } else {
+        imm = 0;
+        o->out = get_address(s, 0, get_field(s, b4),
+                             get_field(s, d4));
+    }
+
+    return help_branch(s, &c, is_imm, imm, o->out);
+}
+
+static DisasJumpType op_ceb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_ceb(cc_op, cpu_env, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_cdb(cc_op, cpu_env, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cxb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_cxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static TCGv_i32 fpinst_extract_m34(DisasContext *s, bool m3_with_fpe,
+                                   bool m4_with_fpe)
+{
+    const bool fpe = s390_has_feat(S390_FEAT_FLOATING_POINT_EXT);
+    uint8_t m3 = get_field(s, m3);
+    uint8_t m4 = get_field(s, m4);
+
+    /* m3 field was introduced with FPE */
+    if (!fpe && m3_with_fpe) {
+        m3 = 0;
+    }
+    /* m4 field was introduced with FPE */
+    if (!fpe && m4_with_fpe) {
+        m4 = 0;
+    }
+
+    /* Check for valid rounding modes. Mode 3 was introduced later. */
+    if (m3 == 2 || m3 > 7 || (!fpe && m3 == 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return NULL;
+    }
+
+    return tcg_const_i32(deposit32(m3, 4, 4, m4));
+}
+
+static DisasJumpType op_cfeb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cfeb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cfdb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cfdb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cfxb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cfxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cgeb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cgeb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cgdb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cgdb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cgxb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cgxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clfeb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clfeb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clfdb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clfdb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clfxb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clfxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clgeb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clgeb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clgdb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clgdb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clgxb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clgxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cegb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cegb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cdgb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cdgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cxgb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cxgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_celgb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_celgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cdlgb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cdlgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cxlgb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cxlgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cksm(DisasContext *s, DisasOps *o)
+{
+    int r2 = get_field(s, r2);
+    TCGv_i64 len = tcg_temp_new_i64();
+
+    gen_helper_cksm(len, cpu_env, o->in1, o->in2, regs[r2 + 1]);
+    set_cc_static(s);
+    return_low128(o->out);
+
+    tcg_gen_add_i64(regs[r2], regs[r2], len);
+    tcg_gen_sub_i64(regs[r2 + 1], regs[r2 + 1], len);
+    tcg_temp_free_i64(len);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clc(DisasContext *s, DisasOps *o)
+{
+    int l = get_field(s, l1);
+    TCGv_i32 vl;
+
+    switch (l + 1) {
+    case 1:
+        tcg_gen_qemu_ld8u(cc_src, o->addr1, get_mem_index(s));
+        tcg_gen_qemu_ld8u(cc_dst, o->in2, get_mem_index(s));
+        break;
+    case 2:
+        tcg_gen_qemu_ld16u(cc_src, o->addr1, get_mem_index(s));
+        tcg_gen_qemu_ld16u(cc_dst, o->in2, get_mem_index(s));
+        break;
+    case 4:
+        tcg_gen_qemu_ld32u(cc_src, o->addr1, get_mem_index(s));
+        tcg_gen_qemu_ld32u(cc_dst, o->in2, get_mem_index(s));
+        break;
+    case 8:
+        tcg_gen_qemu_ld64(cc_src, o->addr1, get_mem_index(s));
+        tcg_gen_qemu_ld64(cc_dst, o->in2, get_mem_index(s));
+        break;
+    default:
+        vl = tcg_const_i32(l);
+        gen_helper_clc(cc_op, cpu_env, vl, o->addr1, o->in2);
+        tcg_temp_free_i32(vl);
+        set_cc_static(s);
+        return DISAS_NEXT;
+    }
+    gen_op_update2_cc_i64(s, CC_OP_LTUGTU_64, cc_src, cc_dst);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clcl(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r2 = get_field(s, r2);
+    TCGv_i32 t1, t2;
+
+    /* r1 and r2 must be even.  */
+    if (r1 & 1 || r2 & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    t1 = tcg_const_i32(r1);
+    t2 = tcg_const_i32(r2);
+    gen_helper_clcl(cc_op, cpu_env, t1, t2);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clcle(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i32 t1, t3;
+
+    /* r1 and r3 must be even.  */
+    if (r1 & 1 || r3 & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    t1 = tcg_const_i32(r1);
+    t3 = tcg_const_i32(r3);
+    gen_helper_clcle(cc_op, cpu_env, t1, o->in2, t3);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t3);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clclu(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i32 t1, t3;
+
+    /* r1 and r3 must be even.  */
+    if (r1 & 1 || r3 & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    t1 = tcg_const_i32(r1);
+    t3 = tcg_const_i32(r3);
+    gen_helper_clclu(cc_op, cpu_env, t1, o->in2, t3);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t3);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clm(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m3 = tcg_const_i32(get_field(s, m3));
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(t1, o->in1);
+    gen_helper_clm(cc_op, cpu_env, t1, m3, o->in2);
+    set_cc_static(s);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(m3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clst(DisasContext *s, DisasOps *o)
+{
+    gen_helper_clst(o->in1, cpu_env, regs[0], o->in1, o->in2);
+    set_cc_static(s);
+    return_low128(o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cps(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 t = tcg_temp_new_i64();
+    tcg_gen_andi_i64(t, o->in1, 0x8000000000000000ull);
+    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffffffffffffull);
+    tcg_gen_or_i64(o->out, o->out, t);
+    tcg_temp_free_i64(t);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cs(DisasContext *s, DisasOps *o)
+{
+    int d2 = get_field(s, d2);
+    int b2 = get_field(s, b2);
+    TCGv_i64 addr, cc;
+
+    /* Note that in1 = R3 (new value) and
+       in2 = (zero-extended) R1 (expected value).  */
+
+    addr = get_address(s, 0, b2, d2);
+    tcg_gen_atomic_cmpxchg_i64(o->out, addr, o->in2, o->in1,
+                               get_mem_index(s), s->insn->data | MO_ALIGN);
+    tcg_temp_free_i64(addr);
+
+    /* Are the memory and expected values (un)equal?  Note that this setcond
+       produces the output CC value, thus the NE sense of the test.  */
+    cc = tcg_temp_new_i64();
+    tcg_gen_setcond_i64(TCG_COND_NE, cc, o->in2, o->out);
+    tcg_gen_extrl_i64_i32(cc_op, cc);
+    tcg_temp_free_i64(cc);
+    set_cc_static(s);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    int d2 = get_field(s, d2);
+    int b2 = get_field(s, b2);
+    DisasJumpType ret = DISAS_NEXT;
+    TCGv_i64 addr;
+    TCGv_i32 t_r1, t_r3;
+
+    /* Note that R1:R1+1 = expected value and R3:R3+1 = new value.  */
+    addr = get_address(s, 0, b2, d2);
+    t_r1 = tcg_const_i32(r1);
+    t_r3 = tcg_const_i32(r3);
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
+    } else if (HAVE_CMPXCHG128) {
+        gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3);
+    } else {
+        gen_helper_exit_atomic(cpu_env);
+        ret = DISAS_NORETURN;
+    }
+    tcg_temp_free_i64(addr);
+    tcg_temp_free_i32(t_r1);
+    tcg_temp_free_i32(t_r3);
+
+    set_cc_static(s);
+    return ret;
+}
+
+static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
+{
+    int r3 = get_field(s, r3);
+    TCGv_i32 t_r3 = tcg_const_i32(r3);
+
+    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+        gen_helper_csst_parallel(cc_op, cpu_env, t_r3, o->addr1, o->in2);
+    } else {
+        gen_helper_csst(cc_op, cpu_env, t_r3, o->addr1, o->in2);
+    }
+    tcg_temp_free_i32(t_r3);
+
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_csp(DisasContext *s, DisasOps *o)
+{
+    MemOp mop = s->insn->data;
+    TCGv_i64 addr, old, cc;
+    TCGLabel *lab = gen_new_label();
+
+    /* Note that in1 = R1 (zero-extended expected value),
+       out = R1 (original reg), out2 = R1+1 (new value).  */
+
+    addr = tcg_temp_new_i64();
+    old = tcg_temp_new_i64();
+    tcg_gen_andi_i64(addr, o->in2, -1ULL << (mop & MO_SIZE));
+    tcg_gen_atomic_cmpxchg_i64(old, addr, o->in1, o->out2,
+                               get_mem_index(s), mop | MO_ALIGN);
+    tcg_temp_free_i64(addr);
+
+    /* Are the memory and expected values (un)equal?  */
+    cc = tcg_temp_new_i64();
+    tcg_gen_setcond_i64(TCG_COND_NE, cc, o->in1, old);
+    tcg_gen_extrl_i64_i32(cc_op, cc);
+
+    /* Write back the output now, so that it happens before the
+       following branch, so that we don't need local temps.  */
+    if ((mop & MO_SIZE) == MO_32) {
+        tcg_gen_deposit_i64(o->out, o->out, old, 0, 32);
+    } else {
+        tcg_gen_mov_i64(o->out, old);
+    }
+    tcg_temp_free_i64(old);
+
+    /* If the comparison was equal, and the LSB of R2 was set,
+       then we need to flush the TLB (for all cpus).  */
+    tcg_gen_xori_i64(cc, cc, 1);
+    tcg_gen_and_i64(cc, cc, o->in2);
+    tcg_gen_brcondi_i64(TCG_COND_EQ, cc, 0, lab);
+    tcg_temp_free_i64(cc);
+
+    gen_helper_purge(cpu_env);
+    gen_set_label(lab);
+
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_cvd(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(t2, o->in1);
+    gen_helper_cvd(t1, t2);
+    tcg_temp_free_i32(t2);
+    tcg_gen_qemu_st64(t1, o->in2, get_mem_index(s));
+    tcg_temp_free_i64(t1);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ct(DisasContext *s, DisasOps *o)
+{
+    int m3 = get_field(s, m3);
+    TCGLabel *lab = gen_new_label();
+    TCGCond c;
+
+    c = tcg_invert_cond(ltgt_cond[m3]);
+    if (s->insn->data) {
+        c = tcg_unsigned_cond(c);
+    }
+    tcg_gen_brcond_i64(c, o->in1, o->in2, lab);
+
+    /* Trap.  */
+    gen_trap(s);
+
+    gen_set_label(lab);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cuXX(DisasContext *s, DisasOps *o)
+{
+    int m3 = get_field(s, m3);
+    int r1 = get_field(s, r1);
+    int r2 = get_field(s, r2);
+    TCGv_i32 tr1, tr2, chk;
+
+    /* R1 and R2 must both be even.  */
+    if ((r1 | r2) & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    if (!s390_has_feat(S390_FEAT_ETF3_ENH)) {
+        m3 = 0;
+    }
+
+    tr1 = tcg_const_i32(r1);
+    tr2 = tcg_const_i32(r2);
+    chk = tcg_const_i32(m3);
+
+    switch (s->insn->data) {
+    case 12:
+        gen_helper_cu12(cc_op, cpu_env, tr1, tr2, chk);
+        break;
+    case 14:
+        gen_helper_cu14(cc_op, cpu_env, tr1, tr2, chk);
+        break;
+    case 21:
+        gen_helper_cu21(cc_op, cpu_env, tr1, tr2, chk);
+        break;
+    case 24:
+        gen_helper_cu24(cc_op, cpu_env, tr1, tr2, chk);
+        break;
+    case 41:
+        gen_helper_cu41(cc_op, cpu_env, tr1, tr2, chk);
+        break;
+    case 42:
+        gen_helper_cu42(cc_op, cpu_env, tr1, tr2, chk);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    tcg_temp_free_i32(tr1);
+    tcg_temp_free_i32(tr2);
+    tcg_temp_free_i32(chk);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_diag(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    TCGv_i32 func_code = tcg_const_i32(get_field(s, i2));
+
+    gen_helper_diag(cpu_env, r1, r3, func_code);
+
+    tcg_temp_free_i32(func_code);
+    tcg_temp_free_i32(r3);
+    tcg_temp_free_i32(r1);
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_divs32(DisasContext *s, DisasOps *o)
+{
+    gen_helper_divs32(o->out2, cpu_env, o->in1, o->in2);
+    return_low128(o->out);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_divu32(DisasContext *s, DisasOps *o)
+{
+    gen_helper_divu32(o->out2, cpu_env, o->in1, o->in2);
+    return_low128(o->out);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_divs64(DisasContext *s, DisasOps *o)
+{
+    gen_helper_divs64(o->out2, cpu_env, o->in1, o->in2);
+    return_low128(o->out);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_divu64(DisasContext *s, DisasOps *o)
+{
+    gen_helper_divu64(o->out2, cpu_env, o->out, o->out2, o->in2);
+    return_low128(o->out);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_deb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_deb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ddb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_ddb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_dxb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_dxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ear(DisasContext *s, DisasOps *o)
+{
+    int r2 = get_field(s, r2);
+    tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, aregs[r2]));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ecag(DisasContext *s, DisasOps *o)
+{
+    /* No cache information provided.  */
+    tcg_gen_movi_i64(o->out, -1);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_efpc(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, fpc));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_epsw(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r2 = get_field(s, r2);
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    /* Note the "subsequently" in the PoO, which implies a defined result
+       if r1 == r2.  Thus we cannot defer these writes to an output hook.  */
+    tcg_gen_shri_i64(t, psw_mask, 32);
+    store_reg32_i64(r1, t);
+    if (r2 != 0) {
+        store_reg32_i64(r2, psw_mask);
+    }
+
+    tcg_temp_free_i64(t);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ex(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    TCGv_i32 ilen;
+    TCGv_i64 v1;
+
+    /* Nested EXECUTE is not allowed.  */
+    if (unlikely(s->ex_value)) {
+        gen_program_exception(s, PGM_EXECUTE);
+        return DISAS_NORETURN;
+    }
+
+    update_psw_addr(s);
+    update_cc_op(s);
+
+    if (r1 == 0) {
+        v1 = tcg_const_i64(0);
+    } else {
+        v1 = regs[r1];
+    }
+
+    ilen = tcg_const_i32(s->ilen);
+    gen_helper_ex(cpu_env, ilen, v1, o->in2);
+    tcg_temp_free_i32(ilen);
+
+    if (r1 == 0) {
+        tcg_temp_free_i64(v1);
+    }
+
+    return DISAS_PC_CC_UPDATED;
+}
+
+static DisasJumpType op_fieb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_fieb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_fidb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_fidb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_fixb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_fixb(o->out, cpu_env, o->in1, o->in2, m34);
+    return_low128(o->out2);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_flogr(DisasContext *s, DisasOps *o)
+{
+    /* We'll use the original input for cc computation, since we get to
+       compare that against 0, which ought to be better than comparing
+       the real output against 64.  It also lets cc_dst be a convenient
+       temporary during our computation.  */
+    gen_op_update1_cc_i64(s, CC_OP_FLOGR, o->in2);
+
+    /* R1 = IN ? CLZ(IN) : 64.  */
+    tcg_gen_clzi_i64(o->out, o->in2, 64);
+
+    /* R1+1 = IN & ~(found bit).  Note that we may attempt to shift this
+       value by 64, which is undefined.  But since the shift is 64 iff the
+       input is zero, we still get the correct result after and'ing.  */
+    tcg_gen_movi_i64(o->out2, 0x8000000000000000ull);
+    tcg_gen_shr_i64(o->out2, o->out2, o->out);
+    tcg_gen_andc_i64(o->out2, cc_dst, o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_icm(DisasContext *s, DisasOps *o)
+{
+    int m3 = get_field(s, m3);
+    int pos, len, base = s->insn->data;
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    uint64_t ccm;
+
+    switch (m3) {
+    case 0xf:
+        /* Effectively a 32-bit load.  */
+        tcg_gen_qemu_ld32u(tmp, o->in2, get_mem_index(s));
+        len = 32;
+        goto one_insert;
+
+    case 0xc:
+    case 0x6:
+    case 0x3:
+        /* Effectively a 16-bit load.  */
+        tcg_gen_qemu_ld16u(tmp, o->in2, get_mem_index(s));
+        len = 16;
+        goto one_insert;
+
+    case 0x8:
+    case 0x4:
+    case 0x2:
+    case 0x1:
+        /* Effectively an 8-bit load.  */
+        tcg_gen_qemu_ld8u(tmp, o->in2, get_mem_index(s));
+        len = 8;
+        goto one_insert;
+
+    one_insert:
+        pos = base + ctz32(m3) * 8;
+        tcg_gen_deposit_i64(o->out, o->out, tmp, pos, len);
+        ccm = ((1ull << len) - 1) << pos;
+        break;
+
+    default:
+        /* This is going to be a sequence of loads and inserts.  */
+        pos = base + 32 - 8;
+        ccm = 0;
+        while (m3) {
+            if (m3 & 0x8) {
+                tcg_gen_qemu_ld8u(tmp, o->in2, get_mem_index(s));
+                tcg_gen_addi_i64(o->in2, o->in2, 1);
+                tcg_gen_deposit_i64(o->out, o->out, tmp, pos, 8);
+                ccm |= 0xff << pos;
+            }
+            m3 = (m3 << 1) & 0xf;
+            pos -= 8;
+        }
+        break;
+    }
+
+    tcg_gen_movi_i64(tmp, ccm);
+    gen_op_update2_cc_i64(s, CC_OP_ICM, tmp, o->out);
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_insi(DisasContext *s, DisasOps *o)
+{
+    int shift = s->insn->data & 0xff;
+    int size = s->insn->data >> 8;
+    tcg_gen_deposit_i64(o->out, o->in1, o->in2, shift, size);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ipm(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 t1, t2;
+
+    gen_op_calc_cc(s);
+    t1 = tcg_temp_new_i64();
+    tcg_gen_extract_i64(t1, psw_mask, 40, 4);
+    t2 = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(t2, cc_op);
+    tcg_gen_deposit_i64(t1, t1, t2, 4, 60);
+    tcg_gen_deposit_i64(o->out, o->out, t1, 24, 8);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_idte(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m4;
+
+    if (s390_has_feat(S390_FEAT_LOCAL_TLB_CLEARING)) {
+        m4 = tcg_const_i32(get_field(s, m4));
+    } else {
+        m4 = tcg_const_i32(0);
+    }
+    gen_helper_idte(cpu_env, o->in1, o->in2, m4);
+    tcg_temp_free_i32(m4);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ipte(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m4;
+
+    if (s390_has_feat(S390_FEAT_LOCAL_TLB_CLEARING)) {
+        m4 = tcg_const_i32(get_field(s, m4));
+    } else {
+        m4 = tcg_const_i32(0);
+    }
+    gen_helper_ipte(cpu_env, o->in1, o->in2, m4);
+    tcg_temp_free_i32(m4);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_iske(DisasContext *s, DisasOps *o)
+{
+    gen_helper_iske(o->out, cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_msa(DisasContext *s, DisasOps *o)
+{
+    int r1 = have_field(s, r1) ? get_field(s, r1) : 0;
+    int r2 = have_field(s, r2) ? get_field(s, r2) : 0;
+    int r3 = have_field(s, r3) ? get_field(s, r3) : 0;
+    TCGv_i32 t_r1, t_r2, t_r3, type;
+
+    switch (s->insn->data) {
+    case S390_FEAT_TYPE_KMA:
+        if (r3 == r1 || r3 == r2) {
+            gen_program_exception(s, PGM_SPECIFICATION);
+            return DISAS_NORETURN;
+        }
+        /* FALL THROUGH */
+    case S390_FEAT_TYPE_KMCTR:
+        if (r3 & 1 || !r3) {
+            gen_program_exception(s, PGM_SPECIFICATION);
+            return DISAS_NORETURN;
+        }
+        /* FALL THROUGH */
+    case S390_FEAT_TYPE_PPNO:
+    case S390_FEAT_TYPE_KMF:
+    case S390_FEAT_TYPE_KMC:
+    case S390_FEAT_TYPE_KMO:
+    case S390_FEAT_TYPE_KM:
+        if (r1 & 1 || !r1) {
+            gen_program_exception(s, PGM_SPECIFICATION);
+            return DISAS_NORETURN;
+        }
+        /* FALL THROUGH */
+    case S390_FEAT_TYPE_KMAC:
+    case S390_FEAT_TYPE_KIMD:
+    case S390_FEAT_TYPE_KLMD:
+        if (r2 & 1 || !r2) {
+            gen_program_exception(s, PGM_SPECIFICATION);
+            return DISAS_NORETURN;
+        }
+        /* FALL THROUGH */
+    case S390_FEAT_TYPE_PCKMO:
+    case S390_FEAT_TYPE_PCC:
+        break;
+    default:
+        g_assert_not_reached();
+    };
+
+    t_r1 = tcg_const_i32(r1);
+    t_r2 = tcg_const_i32(r2);
+    t_r3 = tcg_const_i32(r3);
+    type = tcg_const_i32(s->insn->data);
+    gen_helper_msa(cc_op, cpu_env, t_r1, t_r2, t_r3, type);
+    set_cc_static(s);
+    tcg_temp_free_i32(t_r1);
+    tcg_temp_free_i32(t_r2);
+    tcg_temp_free_i32(t_r3);
+    tcg_temp_free_i32(type);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_keb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_keb(cc_op, cpu_env, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_kdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_kdb(cc_op, cpu_env, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_kxb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_kxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_laa(DisasContext *s, DisasOps *o)
+{
+    /* The real output is indeed the original value in memory;
+       recompute the addition for the computation of CC.  */
+    tcg_gen_atomic_fetch_add_i64(o->in2, o->in2, o->in1, get_mem_index(s),
+                                 s->insn->data | MO_ALIGN);
+    /* However, we need to recompute the addition for setting CC.  */
+    tcg_gen_add_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lan(DisasContext *s, DisasOps *o)
+{
+    /* The real output is indeed the original value in memory;
+       recompute the addition for the computation of CC.  */
+    tcg_gen_atomic_fetch_and_i64(o->in2, o->in2, o->in1, get_mem_index(s),
+                                 s->insn->data | MO_ALIGN);
+    /* However, we need to recompute the operation for setting CC.  */
+    tcg_gen_and_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lao(DisasContext *s, DisasOps *o)
+{
+    /* The real output is indeed the original value in memory;
+       recompute the addition for the computation of CC.  */
+    tcg_gen_atomic_fetch_or_i64(o->in2, o->in2, o->in1, get_mem_index(s),
+                                s->insn->data | MO_ALIGN);
+    /* However, we need to recompute the operation for setting CC.  */
+    tcg_gen_or_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lax(DisasContext *s, DisasOps *o)
+{
+    /* The real output is indeed the original value in memory;
+       recompute the addition for the computation of CC.  */
+    tcg_gen_atomic_fetch_xor_i64(o->in2, o->in2, o->in1, get_mem_index(s),
+                                 s->insn->data | MO_ALIGN);
+    /* However, we need to recompute the operation for setting CC.  */
+    tcg_gen_xor_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ldeb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_ldeb(o->out, cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ledb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_ledb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ldxb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_ldxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lexb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_lexb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lxdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_lxdb(o->out, cpu_env, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lxeb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_lxeb(o->out, cpu_env, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lde(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_shli_i64(o->out, o->in2, 32);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_llgt(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffff);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ld8s(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld8s(o->out, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ld8u(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld8u(o->out, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ld16s(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld16s(o->out, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ld16u(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld16u(o->out, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ld32s(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld32s(o->out, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ld32u(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld32u(o->out, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ld64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld64(o->out, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lat(DisasContext *s, DisasOps *o)
+{
+    TCGLabel *lab = gen_new_label();
+    store_reg32_i64(get_field(s, r1), o->in2);
+    /* The value is stored even in case of trap. */
+    tcg_gen_brcondi_i64(TCG_COND_NE, o->in2, 0, lab);
+    gen_trap(s);
+    gen_set_label(lab);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lgat(DisasContext *s, DisasOps *o)
+{
+    TCGLabel *lab = gen_new_label();
+    tcg_gen_qemu_ld64(o->out, o->in2, get_mem_index(s));
+    /* The value is stored even in case of trap. */
+    tcg_gen_brcondi_i64(TCG_COND_NE, o->out, 0, lab);
+    gen_trap(s);
+    gen_set_label(lab);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lfhat(DisasContext *s, DisasOps *o)
+{
+    TCGLabel *lab = gen_new_label();
+    store_reg32h_i64(get_field(s, r1), o->in2);
+    /* The value is stored even in case of trap. */
+    tcg_gen_brcondi_i64(TCG_COND_NE, o->in2, 0, lab);
+    gen_trap(s);
+    gen_set_label(lab);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_llgfat(DisasContext *s, DisasOps *o)
+{
+    TCGLabel *lab = gen_new_label();
+    tcg_gen_qemu_ld32u(o->out, o->in2, get_mem_index(s));
+    /* The value is stored even in case of trap. */
+    tcg_gen_brcondi_i64(TCG_COND_NE, o->out, 0, lab);
+    gen_trap(s);
+    gen_set_label(lab);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_llgtat(DisasContext *s, DisasOps *o)
+{
+    TCGLabel *lab = gen_new_label();
+    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffff);
+    /* The value is stored even in case of trap. */
+    tcg_gen_brcondi_i64(TCG_COND_NE, o->out, 0, lab);
+    gen_trap(s);
+    gen_set_label(lab);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_loc(DisasContext *s, DisasOps *o)
+{
+    DisasCompare c;
+
+    disas_jcc(s, &c, get_field(s, m3));
+
+    if (c.is_64) {
+        tcg_gen_movcond_i64(c.cond, o->out, c.u.s64.a, c.u.s64.b,
+                            o->in2, o->in1);
+        free_compare(&c);
+    } else {
+        TCGv_i32 t32 = tcg_temp_new_i32();
+        TCGv_i64 t, z;
+
+        tcg_gen_setcond_i32(c.cond, t32, c.u.s32.a, c.u.s32.b);
+        free_compare(&c);
+
+        t = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(t, t32);
+        tcg_temp_free_i32(t32);
+
+        z = tcg_const_i64(0);
+        tcg_gen_movcond_i64(TCG_COND_NE, o->out, t, z, o->in2, o->in1);
+        tcg_temp_free_i64(t);
+        tcg_temp_free_i64(z);
+    }
+
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_lctl(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    gen_helper_lctl(cpu_env, r1, o->in2, r3);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    /* Exit to main loop to reevaluate s390_cpu_exec_interrupt.  */
+    return DISAS_PC_STALE_NOCHAIN;
+}
+
+static DisasJumpType op_lctlg(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    gen_helper_lctlg(cpu_env, r1, o->in2, r3);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    /* Exit to main loop to reevaluate s390_cpu_exec_interrupt.  */
+    return DISAS_PC_STALE_NOCHAIN;
+}
+
+static DisasJumpType op_lra(DisasContext *s, DisasOps *o)
+{
+    gen_helper_lra(o->out, cpu_env, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lpp(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_st_i64(o->in2, cpu_env, offsetof(CPUS390XState, pp));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lpsw(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 t1, t2;
+
+    per_breaking_event(s);
+
+    t1 = tcg_temp_new_i64();
+    t2 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s),
+                        MO_TEUL | MO_ALIGN_8);
+    tcg_gen_addi_i64(o->in2, o->in2, 4);
+    tcg_gen_qemu_ld32u(t2, o->in2, get_mem_index(s));
+    /* Convert the 32-bit PSW_MASK into the 64-bit PSW_MASK.  */
+    tcg_gen_shli_i64(t1, t1, 32);
+    gen_helper_load_psw(cpu_env, t1, t2);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    return DISAS_NORETURN;
+}
+
+static DisasJumpType op_lpswe(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 t1, t2;
+
+    per_breaking_event(s);
+
+    t1 = tcg_temp_new_i64();
+    t2 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s),
+                        MO_TEQ | MO_ALIGN_8);
+    tcg_gen_addi_i64(o->in2, o->in2, 8);
+    tcg_gen_qemu_ld64(t2, o->in2, get_mem_index(s));
+    gen_helper_load_psw(cpu_env, t1, t2);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    return DISAS_NORETURN;
+}
+#endif
+
+static DisasJumpType op_lam(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    gen_helper_lam(cpu_env, r1, o->in2, r3);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lm32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i64 t1, t2;
+
+    /* Only one register to read. */
+    t1 = tcg_temp_new_i64();
+    if (unlikely(r1 == r3)) {
+        tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
+        store_reg32_i64(r1, t1);
+        tcg_temp_free(t1);
+        return DISAS_NEXT;
+    }
+
+    /* First load the values of the first and last registers to trigger
+       possible page faults. */
+    t2 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
+    tcg_gen_addi_i64(t2, o->in2, 4 * ((r3 - r1) & 15));
+    tcg_gen_qemu_ld32u(t2, t2, get_mem_index(s));
+    store_reg32_i64(r1, t1);
+    store_reg32_i64(r3, t2);
+
+    /* Only two registers to read. */
+    if (((r1 + 1) & 15) == r3) {
+        tcg_temp_free(t2);
+        tcg_temp_free(t1);
+        return DISAS_NEXT;
+    }
+
+    /* Then load the remaining registers. Page fault can't occur. */
+    r3 = (r3 - 1) & 15;
+    tcg_gen_movi_i64(t2, 4);
+    while (r1 != r3) {
+        r1 = (r1 + 1) & 15;
+        tcg_gen_add_i64(o->in2, o->in2, t2);
+        tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
+        store_reg32_i64(r1, t1);
+    }
+    tcg_temp_free(t2);
+    tcg_temp_free(t1);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lmh(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i64 t1, t2;
+
+    /* Only one register to read. */
+    t1 = tcg_temp_new_i64();
+    if (unlikely(r1 == r3)) {
+        tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
+        store_reg32h_i64(r1, t1);
+        tcg_temp_free(t1);
+        return DISAS_NEXT;
+    }
+
+    /* First load the values of the first and last registers to trigger
+       possible page faults. */
+    t2 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
+    tcg_gen_addi_i64(t2, o->in2, 4 * ((r3 - r1) & 15));
+    tcg_gen_qemu_ld32u(t2, t2, get_mem_index(s));
+    store_reg32h_i64(r1, t1);
+    store_reg32h_i64(r3, t2);
+
+    /* Only two registers to read. */
+    if (((r1 + 1) & 15) == r3) {
+        tcg_temp_free(t2);
+        tcg_temp_free(t1);
+        return DISAS_NEXT;
+    }
+
+    /* Then load the remaining registers. Page fault can't occur. */
+    r3 = (r3 - 1) & 15;
+    tcg_gen_movi_i64(t2, 4);
+    while (r1 != r3) {
+        r1 = (r1 + 1) & 15;
+        tcg_gen_add_i64(o->in2, o->in2, t2);
+        tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
+        store_reg32h_i64(r1, t1);
+    }
+    tcg_temp_free(t2);
+    tcg_temp_free(t1);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lm64(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i64 t1, t2;
+
+    /* Only one register to read. */
+    if (unlikely(r1 == r3)) {
+        tcg_gen_qemu_ld64(regs[r1], o->in2, get_mem_index(s));
+        return DISAS_NEXT;
+    }
+
+    /* First load the values of the first and last registers to trigger
+       possible page faults. */
+    t1 = tcg_temp_new_i64();
+    t2 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld64(t1, o->in2, get_mem_index(s));
+    tcg_gen_addi_i64(t2, o->in2, 8 * ((r3 - r1) & 15));
+    tcg_gen_qemu_ld64(regs[r3], t2, get_mem_index(s));
+    tcg_gen_mov_i64(regs[r1], t1);
+    tcg_temp_free(t2);
+
+    /* Only two registers to read. */
+    if (((r1 + 1) & 15) == r3) {
+        tcg_temp_free(t1);
+        return DISAS_NEXT;
+    }
+
+    /* Then load the remaining registers. Page fault can't occur. */
+    r3 = (r3 - 1) & 15;
+    tcg_gen_movi_i64(t1, 8);
+    while (r1 != r3) {
+        r1 = (r1 + 1) & 15;
+        tcg_gen_add_i64(o->in2, o->in2, t1);
+        tcg_gen_qemu_ld64(regs[r1], o->in2, get_mem_index(s));
+    }
+    tcg_temp_free(t1);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lpd(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 a1, a2;
+    MemOp mop = s->insn->data;
+
+    /* In a parallel context, stop the world and single step.  */
+    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+        update_psw_addr(s);
+        update_cc_op(s);
+        gen_exception(EXCP_ATOMIC);
+        return DISAS_NORETURN;
+    }
+
+    /* In a serial context, perform the two loads ... */
+    a1 = get_address(s, 0, get_field(s, b1), get_field(s, d1));
+    a2 = get_address(s, 0, get_field(s, b2), get_field(s, d2));
+    tcg_gen_qemu_ld_i64(o->out, a1, get_mem_index(s), mop | MO_ALIGN);
+    tcg_gen_qemu_ld_i64(o->out2, a2, get_mem_index(s), mop | MO_ALIGN);
+    tcg_temp_free_i64(a1);
+    tcg_temp_free_i64(a2);
+
+    /* ... and indicate that we performed them while interlocked.  */
+    gen_op_movi_cc(s, 0);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lpq(DisasContext *s, DisasOps *o)
+{
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_lpq(o->out, cpu_env, o->in2);
+    } else if (HAVE_ATOMIC128) {
+        gen_helper_lpq_parallel(o->out, cpu_env, o->in2);
+    } else {
+        gen_helper_exit_atomic(cpu_env);
+        return DISAS_NORETURN;
+    }
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_lura(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld_tl(o->out, o->in2, MMU_REAL_IDX, s->insn->data);
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_lzrb(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_andi_i64(o->out, o->in2, -256);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lcbb(DisasContext *s, DisasOps *o)
+{
+    const int64_t block_size = (1ull << (get_field(s, m3) + 6));
+
+    if (get_field(s, m3) > 6) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    tcg_gen_ori_i64(o->addr1, o->addr1, -block_size);
+    tcg_gen_neg_i64(o->addr1, o->addr1);
+    tcg_gen_movi_i64(o->out, 16);
+    tcg_gen_umin_i64(o->out, o->out, o->addr1);
+    gen_op_update1_cc_i64(s, CC_OP_LCBB, o->out);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mc(DisasContext *s, DisasOps *o)
+{
+#if !defined(CONFIG_USER_ONLY)
+    TCGv_i32 i2;
+#endif
+    const uint16_t monitor_class = get_field(s, i2);
+
+    if (monitor_class & 0xff00) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+#if !defined(CONFIG_USER_ONLY)
+    i2 = tcg_const_i32(monitor_class);
+    gen_helper_monitor_call(cpu_env, o->addr1, i2);
+    tcg_temp_free_i32(i2);
+#endif
+    /* Defaults to a NOP. */
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mov2(DisasContext *s, DisasOps *o)
+{
+    o->out = o->in2;
+    o->g_out = o->g_in2;
+    o->in2 = NULL;
+    o->g_in2 = false;
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mov2e(DisasContext *s, DisasOps *o)
+{
+    int b2 = get_field(s, b2);
+    TCGv ar1 = tcg_temp_new_i64();
+
+    o->out = o->in2;
+    o->g_out = o->g_in2;
+    o->in2 = NULL;
+    o->g_in2 = false;
+
+    switch (s->base.tb->flags & FLAG_MASK_ASC) {
+    case PSW_ASC_PRIMARY >> FLAG_MASK_PSW_SHIFT:
+        tcg_gen_movi_i64(ar1, 0);
+        break;
+    case PSW_ASC_ACCREG >> FLAG_MASK_PSW_SHIFT:
+        tcg_gen_movi_i64(ar1, 1);
+        break;
+    case PSW_ASC_SECONDARY >> FLAG_MASK_PSW_SHIFT:
+        if (b2) {
+            tcg_gen_ld32u_i64(ar1, cpu_env, offsetof(CPUS390XState, aregs[b2]));
+        } else {
+            tcg_gen_movi_i64(ar1, 0);
+        }
+        break;
+    case PSW_ASC_HOME >> FLAG_MASK_PSW_SHIFT:
+        tcg_gen_movi_i64(ar1, 2);
+        break;
+    }
+
+    tcg_gen_st32_i64(ar1, cpu_env, offsetof(CPUS390XState, aregs[1]));
+    tcg_temp_free_i64(ar1);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_movx(DisasContext *s, DisasOps *o)
+{
+    o->out = o->in1;
+    o->out2 = o->in2;
+    o->g_out = o->g_in1;
+    o->g_out2 = o->g_in2;
+    o->in1 = NULL;
+    o->in2 = NULL;
+    o->g_in1 = o->g_in2 = false;
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvc(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_mvc(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvcin(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_mvcin(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvcl(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r2 = get_field(s, r2);
+    TCGv_i32 t1, t2;
+
+    /* r1 and r2 must be even.  */
+    if (r1 & 1 || r2 & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    t1 = tcg_const_i32(r1);
+    t2 = tcg_const_i32(r2);
+    gen_helper_mvcl(cc_op, cpu_env, t1, t2);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvcle(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i32 t1, t3;
+
+    /* r1 and r3 must be even.  */
+    if (r1 & 1 || r3 & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    t1 = tcg_const_i32(r1);
+    t3 = tcg_const_i32(r3);
+    gen_helper_mvcle(cc_op, cpu_env, t1, o->in2, t3);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t3);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvclu(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i32 t1, t3;
+
+    /* r1 and r3 must be even.  */
+    if (r1 & 1 || r3 & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    t1 = tcg_const_i32(r1);
+    t3 = tcg_const_i32(r3);
+    gen_helper_mvclu(cc_op, cpu_env, t1, o->in2, t3);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t3);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvcos(DisasContext *s, DisasOps *o)
+{
+    int r3 = get_field(s, r3);
+    gen_helper_mvcos(cc_op, cpu_env, o->addr1, o->in2, regs[r3]);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_mvcp(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, l1);
+    gen_helper_mvcp(cc_op, cpu_env, regs[r1], o->addr1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvcs(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, l1);
+    gen_helper_mvcs(cc_op, cpu_env, regs[r1], o->addr1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_mvn(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_mvn(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvo(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_mvo(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvpg(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 t1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 t2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_mvpg(cc_op, cpu_env, regs[0], t1, t2);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvst(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 t1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 t2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_mvst(cc_op, cpu_env, t1, t2);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvz(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_mvz(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mul(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_mul_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mul128(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_mulu2_i64(o->out2, o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_muls128(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_muls2_i64(o->out2, o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_meeb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_meeb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mdeb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_mdeb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_mdb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mxb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_mxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mxdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_mxdb(o->out, cpu_env, o->out, o->out2, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_maeb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 r3 = load_freg32_i64(get_field(s, r3));
+    gen_helper_maeb(o->out, cpu_env, o->in1, o->in2, r3);
+    tcg_temp_free_i64(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_madb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 r3 = load_freg(get_field(s, r3));
+    gen_helper_madb(o->out, cpu_env, o->in1, o->in2, r3);
+    tcg_temp_free_i64(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mseb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 r3 = load_freg32_i64(get_field(s, r3));
+    gen_helper_mseb(o->out, cpu_env, o->in1, o->in2, r3);
+    tcg_temp_free_i64(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_msdb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 r3 = load_freg(get_field(s, r3));
+    gen_helper_msdb(o->out, cpu_env, o->in1, o->in2, r3);
+    tcg_temp_free_i64(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_nabs(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 z, n;
+    z = tcg_const_i64(0);
+    n = tcg_temp_new_i64();
+    tcg_gen_neg_i64(n, o->in2);
+    tcg_gen_movcond_i64(TCG_COND_GE, o->out, o->in2, z, n, o->in2);
+    tcg_temp_free_i64(n);
+    tcg_temp_free_i64(z);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_nabsf32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ori_i64(o->out, o->in2, 0x80000000ull);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_nabsf64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ori_i64(o->out, o->in2, 0x8000000000000000ull);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_nabsf128(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ori_i64(o->out, o->in1, 0x8000000000000000ull);
+    tcg_gen_mov_i64(o->out2, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_nc(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_nc(cc_op, cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_neg(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_neg_i64(o->out, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_negf32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_xori_i64(o->out, o->in2, 0x80000000ull);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_negf64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_xori_i64(o->out, o->in2, 0x8000000000000000ull);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_negf128(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_xori_i64(o->out, o->in1, 0x8000000000000000ull);
+    tcg_gen_mov_i64(o->out2, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_oc(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_oc(cc_op, cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_or(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_or_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ori(DisasContext *s, DisasOps *o)
+{
+    int shift = s->insn->data & 0xff;
+    int size = s->insn->data >> 8;
+    uint64_t mask = ((1ull << size) - 1) << shift;
+
+    assert(!o->g_in2);
+    tcg_gen_shli_i64(o->in2, o->in2, shift);
+    tcg_gen_or_i64(o->out, o->in1, o->in2);
+
+    /* Produce the CC from only the bits manipulated.  */
+    tcg_gen_andi_i64(cc_dst, o->out, mask);
+    set_cc_nz_u64(s, cc_dst);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_oi(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+
+    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
+        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
+    } else {
+        /* Perform the atomic operation in memory. */
+        tcg_gen_atomic_fetch_or_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
+                                    s->insn->data);
+    }
+
+    /* Recompute also for atomic case: needed for setting CC. */
+    tcg_gen_or_i64(o->out, o->in1, o->in2);
+
+    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
+        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_pack(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_pack(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_pka(DisasContext *s, DisasOps *o)
+{
+    int l2 = get_field(s, l2) + 1;
+    TCGv_i32 l;
+
+    /* The length must not exceed 32 bytes.  */
+    if (l2 > 32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    l = tcg_const_i32(l2);
+    gen_helper_pka(cpu_env, o->addr1, o->in2, l);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_pku(DisasContext *s, DisasOps *o)
+{
+    int l2 = get_field(s, l2) + 1;
+    TCGv_i32 l;
+
+    /* The length must be even and should not exceed 64 bytes.  */
+    if ((l2 & 1) || (l2 > 64)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    l = tcg_const_i32(l2);
+    gen_helper_pku(cpu_env, o->addr1, o->in2, l);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_popcnt(DisasContext *s, DisasOps *o)
+{
+    gen_helper_popcnt(o->out, o->in2);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_ptlb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_ptlb(cpu_env);
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_risbg(DisasContext *s, DisasOps *o)
+{
+    int i3 = get_field(s, i3);
+    int i4 = get_field(s, i4);
+    int i5 = get_field(s, i5);
+    int do_zero = i4 & 0x80;
+    uint64_t mask, imask, pmask;
+    int pos, len, rot;
+
+    /* Adjust the arguments for the specific insn.  */
+    switch (s->fields.op2) {
+    case 0x55: /* risbg */
+    case 0x59: /* risbgn */
+        i3 &= 63;
+        i4 &= 63;
+        pmask = ~0;
+        break;
+    case 0x5d: /* risbhg */
+        i3 &= 31;
+        i4 &= 31;
+        pmask = 0xffffffff00000000ull;
+        break;
+    case 0x51: /* risblg */
+        i3 = (i3 & 31) + 32;
+        i4 = (i4 & 31) + 32;
+        pmask = 0x00000000ffffffffull;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    /* MASK is the set of bits to be inserted from R2. */
+    if (i3 <= i4) {
+        /* [0...i3---i4...63] */
+        mask = (-1ull >> i3) & (-1ull << (63 - i4));
+    } else {
+        /* [0---i4...i3---63] */
+        mask = (-1ull >> i3) | (-1ull << (63 - i4));
+    }
+    /* For RISBLG/RISBHG, the wrapping is limited to the high/low doubleword. */
+    mask &= pmask;
+
+    /* IMASK is the set of bits to be kept from R1.  In the case of the high/low
+       insns, we need to keep the other half of the register.  */
+    imask = ~mask | ~pmask;
+    if (do_zero) {
+        imask = ~pmask;
+    }
+
+    len = i4 - i3 + 1;
+    pos = 63 - i4;
+    rot = i5 & 63;
+
+    /* In some cases we can implement this with extract.  */
+    if (imask == 0 && pos == 0 && len > 0 && len <= rot) {
+        tcg_gen_extract_i64(o->out, o->in2, 64 - rot, len);
+        return DISAS_NEXT;
+    }
+
+    /* In some cases we can implement this with deposit.  */
+    if (len > 0 && (imask == 0 || ~mask == imask)) {
+        /* Note that we rotate the bits to be inserted to the lsb, not to
+           the position as described in the PoO.  */
+        rot = (rot - pos) & 63;
+    } else {
+        pos = -1;
+    }
+
+    /* Rotate the input as necessary.  */
+    tcg_gen_rotli_i64(o->in2, o->in2, rot);
+
+    /* Insert the selected bits into the output.  */
+    if (pos >= 0) {
+        if (imask == 0) {
+            tcg_gen_deposit_z_i64(o->out, o->in2, pos, len);
+        } else {
+            tcg_gen_deposit_i64(o->out, o->out, o->in2, pos, len);
+        }
+    } else if (imask == 0) {
+        tcg_gen_andi_i64(o->out, o->in2, mask);
+    } else {
+        tcg_gen_andi_i64(o->in2, o->in2, mask);
+        tcg_gen_andi_i64(o->out, o->out, imask);
+        tcg_gen_or_i64(o->out, o->out, o->in2);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rosbg(DisasContext *s, DisasOps *o)
+{
+    int i3 = get_field(s, i3);
+    int i4 = get_field(s, i4);
+    int i5 = get_field(s, i5);
+    uint64_t mask;
+
+    /* If this is a test-only form, arrange to discard the result.  */
+    if (i3 & 0x80) {
+        o->out = tcg_temp_new_i64();
+        o->g_out = false;
+    }
+
+    i3 &= 63;
+    i4 &= 63;
+    i5 &= 63;
+
+    /* MASK is the set of bits to be operated on from R2.
+       Take care for I3/I4 wraparound.  */
+    mask = ~0ull >> i3;
+    if (i3 <= i4) {
+        mask ^= ~0ull >> i4 >> 1;
+    } else {
+        mask |= ~(~0ull >> i4 >> 1);
+    }
+
+    /* Rotate the input as necessary.  */
+    tcg_gen_rotli_i64(o->in2, o->in2, i5);
+
+    /* Operate.  */
+    switch (s->fields.op2) {
+    case 0x54: /* AND */
+        tcg_gen_ori_i64(o->in2, o->in2, ~mask);
+        tcg_gen_and_i64(o->out, o->out, o->in2);
+        break;
+    case 0x56: /* OR */
+        tcg_gen_andi_i64(o->in2, o->in2, mask);
+        tcg_gen_or_i64(o->out, o->out, o->in2);
+        break;
+    case 0x57: /* XOR */
+        tcg_gen_andi_i64(o->in2, o->in2, mask);
+        tcg_gen_xor_i64(o->out, o->out, o->in2);
+        break;
+    default:
+        abort();
+    }
+
+    /* Set the CC.  */
+    tcg_gen_andi_i64(cc_dst, o->out, mask);
+    set_cc_nz_u64(s, cc_dst);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rev16(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_bswap16_i64(o->out, o->in2, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rev32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_bswap32_i64(o->out, o->in2, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rev64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_bswap64_i64(o->out, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rll32(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+    TCGv_i32 to = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(t1, o->in1);
+    tcg_gen_extrl_i64_i32(t2, o->in2);
+    tcg_gen_rotl_i32(to, t1, t2);
+    tcg_gen_extu_i32_i64(o->out, to);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(to);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rll64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_rotl_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_rrbe(DisasContext *s, DisasOps *o)
+{
+    gen_helper_rrbe(cc_op, cpu_env, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sacf(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sacf(cpu_env, o->in2);
+    /* Addressing mode has changed, so end the block.  */
+    return DISAS_PC_STALE;
+}
+#endif
+
+static DisasJumpType op_sam(DisasContext *s, DisasOps *o)
+{
+    int sam = s->insn->data;
+    TCGv_i64 tsam;
+    uint64_t mask;
+
+    switch (sam) {
+    case 0:
+        mask = 0xffffff;
+        break;
+    case 1:
+        mask = 0x7fffffff;
+        break;
+    default:
+        mask = -1;
+        break;
+    }
+
+    /* Bizarre but true, we check the address of the current insn for the
+       specification exception, not the next to be executed.  Thus the PoO
+       documents that Bad Things Happen two bytes before the end.  */
+    if (s->base.pc_next & ~mask) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    s->pc_tmp &= mask;
+
+    tsam = tcg_const_i64(sam);
+    tcg_gen_deposit_i64(psw_mask, psw_mask, tsam, 31, 2);
+    tcg_temp_free_i64(tsam);
+
+    /* Always exit the TB, since we (may have) changed execution mode.  */
+    return DISAS_PC_STALE;
+}
+
+static DisasJumpType op_sar(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    tcg_gen_st32_i64(o->in2, cpu_env, offsetof(CPUS390XState, aregs[r1]));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_seb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_seb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sdb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sxb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sqeb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sqeb(o->out, cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sqdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sqdb(o->out, cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sqxb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sqxb(o->out, cpu_env, o->in1, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_servc(DisasContext *s, DisasOps *o)
+{
+    gen_helper_servc(cc_op, cpu_env, o->in2, o->in1);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sigp(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    gen_helper_sigp(cc_op, cpu_env, o->in2, r1, r3);
+    set_cc_static(s);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_soc(DisasContext *s, DisasOps *o)
+{
+    DisasCompare c;
+    TCGv_i64 a, h;
+    TCGLabel *lab;
+    int r1;
+
+    disas_jcc(s, &c, get_field(s, m3));
+
+    /* We want to store when the condition is fulfilled, so branch
+       out when it's not */
+    c.cond = tcg_invert_cond(c.cond);
+
+    lab = gen_new_label();
+    if (c.is_64) {
+        tcg_gen_brcond_i64(c.cond, c.u.s64.a, c.u.s64.b, lab);
+    } else {
+        tcg_gen_brcond_i32(c.cond, c.u.s32.a, c.u.s32.b, lab);
+    }
+    free_compare(&c);
+
+    r1 = get_field(s, r1);
+    a = get_address(s, 0, get_field(s, b2), get_field(s, d2));
+    switch (s->insn->data) {
+    case 1: /* STOCG */
+        tcg_gen_qemu_st64(regs[r1], a, get_mem_index(s));
+        break;
+    case 0: /* STOC */
+        tcg_gen_qemu_st32(regs[r1], a, get_mem_index(s));
+        break;
+    case 2: /* STOCFH */
+        h = tcg_temp_new_i64();
+        tcg_gen_shri_i64(h, regs[r1], 32);
+        tcg_gen_qemu_st32(h, a, get_mem_index(s));
+        tcg_temp_free_i64(h);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    tcg_temp_free_i64(a);
+
+    gen_set_label(lab);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sla(DisasContext *s, DisasOps *o)
+{
+    uint64_t sign = 1ull << s->insn->data;
+    enum cc_op cco = s->insn->data == 31 ? CC_OP_SLA_32 : CC_OP_SLA_64;
+    gen_op_update2_cc_i64(s, cco, o->in1, o->in2);
+    tcg_gen_shl_i64(o->out, o->in1, o->in2);
+    /* The arithmetic left shift is curious in that it does not affect
+       the sign bit.  Copy that over from the source unchanged.  */
+    tcg_gen_andi_i64(o->out, o->out, ~sign);
+    tcg_gen_andi_i64(o->in1, o->in1, sign);
+    tcg_gen_or_i64(o->out, o->out, o->in1);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sll(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_shl_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sra(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_sar_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_srl(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_shr_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sfpc(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sfpc(cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sfas(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sfas(cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_srnm(DisasContext *s, DisasOps *o)
+{
+    /* Bits other than 62 and 63 are ignored. Bit 29 is set to zero. */
+    tcg_gen_andi_i64(o->addr1, o->addr1, 0x3ull);
+    gen_helper_srnm(cpu_env, o->addr1);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_srnmb(DisasContext *s, DisasOps *o)
+{
+    /* Bits 0-55 are are ignored. */
+    tcg_gen_andi_i64(o->addr1, o->addr1, 0xffull);
+    gen_helper_srnm(cpu_env, o->addr1);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_srnmt(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    /* Bits other than 61-63 are ignored. */
+    tcg_gen_andi_i64(o->addr1, o->addr1, 0x7ull);
+
+    /* No need to call a helper, we don't implement dfp */
+    tcg_gen_ld32u_i64(tmp, cpu_env, offsetof(CPUS390XState, fpc));
+    tcg_gen_deposit_i64(tmp, tmp, o->addr1, 4, 3);
+    tcg_gen_st32_i64(tmp, cpu_env, offsetof(CPUS390XState, fpc));
+
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_spm(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_extrl_i64_i32(cc_op, o->in1);
+    tcg_gen_extract_i32(cc_op, cc_op, 28, 2);
+    set_cc_static(s);
+
+    tcg_gen_shri_i64(o->in1, o->in1, 24);
+    tcg_gen_deposit_i64(psw_mask, psw_mask, o->in1, PSW_SHIFT_MASK_PM, 4);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ectg(DisasContext *s, DisasOps *o)
+{
+    int b1 = get_field(s, b1);
+    int d1 = get_field(s, d1);
+    int b2 = get_field(s, b2);
+    int d2 = get_field(s, d2);
+    int r3 = get_field(s, r3);
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    /* fetch all operands first */
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_addi_i64(o->in1, regs[b1], d1);
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_addi_i64(o->in2, regs[b2], d2);
+    o->addr1 = tcg_temp_new_i64();
+    gen_addi_and_wrap_i64(s, o->addr1, regs[r3], 0);
+
+    /* load the third operand into r3 before modifying anything */
+    tcg_gen_qemu_ld64(regs[r3], o->addr1, get_mem_index(s));
+
+    /* subtract CPU timer from first operand and store in GR0 */
+    gen_helper_stpt(tmp, cpu_env);
+    tcg_gen_sub_i64(regs[0], o->in1, tmp);
+
+    /* store second operand in GR1 */
+    tcg_gen_mov_i64(regs[1], o->in2);
+
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_spka(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_shri_i64(o->in2, o->in2, 4);
+    tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, PSW_SHIFT_KEY, 4);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sske(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sske(cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ssm(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, 56, 8);
+    /* Exit to main loop to reevaluate s390_cpu_exec_interrupt.  */
+    return DISAS_PC_STALE_NOCHAIN;
+}
+
+static DisasJumpType op_stap(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, core_id));
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_stck(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stck(o->out, cpu_env);
+    /* ??? We don't implement clock states.  */
+    gen_op_movi_cc(s, 0);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stcke(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 c1 = tcg_temp_new_i64();
+    TCGv_i64 c2 = tcg_temp_new_i64();
+    TCGv_i64 todpr = tcg_temp_new_i64();
+    gen_helper_stck(c1, cpu_env);
+    /* 16 bit value store in an uint32_t (only valid bits set) */
+    tcg_gen_ld32u_i64(todpr, cpu_env, offsetof(CPUS390XState, todpr));
+    /* Shift the 64-bit value into its place as a zero-extended
+       104-bit value.  Note that "bit positions 64-103 are always
+       non-zero so that they compare differently to STCK"; we set
+       the least significant bit to 1.  */
+    tcg_gen_shli_i64(c2, c1, 56);
+    tcg_gen_shri_i64(c1, c1, 8);
+    tcg_gen_ori_i64(c2, c2, 0x10000);
+    tcg_gen_or_i64(c2, c2, todpr);
+    tcg_gen_qemu_st64(c1, o->in2, get_mem_index(s));
+    tcg_gen_addi_i64(o->in2, o->in2, 8);
+    tcg_gen_qemu_st64(c2, o->in2, get_mem_index(s));
+    tcg_temp_free_i64(c1);
+    tcg_temp_free_i64(c2);
+    tcg_temp_free_i64(todpr);
+    /* ??? We don't implement clock states.  */
+    gen_op_movi_cc(s, 0);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_sck(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld_i64(o->in1, o->addr1, get_mem_index(s), MO_TEQ | MO_ALIGN);
+    gen_helper_sck(cc_op, cpu_env, o->in1);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sckc(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sckc(cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sckpf(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sckpf(cpu_env, regs[0]);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stckc(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stckc(o->out, cpu_env);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stctg(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    gen_helper_stctg(cpu_env, r1, o->in2, r3);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stctl(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    gen_helper_stctl(cpu_env, r1, o->in2, r3);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stidp(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ld_i64(o->out, cpu_env, offsetof(CPUS390XState, cpuid));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_spt(DisasContext *s, DisasOps *o)
+{
+    gen_helper_spt(cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stfl(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stfl(cpu_env);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stpt(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stpt(o->out, cpu_env);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stsi(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stsi(cc_op, cpu_env, o->in2, regs[0], regs[1]);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_spx(DisasContext *s, DisasOps *o)
+{
+    gen_helper_spx(cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_xsch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_xsch(cpu_env, regs[1]);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_csch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_csch(cpu_env, regs[1]);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_hsch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_hsch(cpu_env, regs[1]);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_msch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_msch(cpu_env, regs[1], o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rchp(DisasContext *s, DisasOps *o)
+{
+    gen_helper_rchp(cpu_env, regs[1]);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rsch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_rsch(cpu_env, regs[1]);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sal(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sal(cpu_env, regs[1]);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_schm(DisasContext *s, DisasOps *o)
+{
+    gen_helper_schm(cpu_env, regs[1], regs[2], o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_siga(DisasContext *s, DisasOps *o)
+{
+    /* From KVM code: Not provided, set CC = 3 for subchannel not operational */
+    gen_op_movi_cc(s, 3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stcps(DisasContext *s, DisasOps *o)
+{
+    /* The instruction is suppressed if not provided. */
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ssch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_ssch(cpu_env, regs[1], o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stsch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stsch(cpu_env, regs[1], o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stcrw(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stcrw(cpu_env, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tpi(DisasContext *s, DisasOps *o)
+{
+    gen_helper_tpi(cc_op, cpu_env, o->addr1);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tsch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_tsch(cpu_env, regs[1], o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_chsc(DisasContext *s, DisasOps *o)
+{
+    gen_helper_chsc(cpu_env, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stpx(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ld_i64(o->out, cpu_env, offsetof(CPUS390XState, psa));
+    tcg_gen_andi_i64(o->out, o->out, 0x7fffe000);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stnosm(DisasContext *s, DisasOps *o)
+{
+    uint64_t i2 = get_field(s, i2);
+    TCGv_i64 t;
+
+    /* It is important to do what the instruction name says: STORE THEN.
+       If we let the output hook perform the store then if we fault and
+       restart, we'll have the wrong SYSTEM MASK in place.  */
+    t = tcg_temp_new_i64();
+    tcg_gen_shri_i64(t, psw_mask, 56);
+    tcg_gen_qemu_st8(t, o->addr1, get_mem_index(s));
+    tcg_temp_free_i64(t);
+
+    if (s->fields.op == 0xac) {
+        tcg_gen_andi_i64(psw_mask, psw_mask,
+                         (i2 << 56) | 0x00ffffffffffffffull);
+    } else {
+        tcg_gen_ori_i64(psw_mask, psw_mask, i2 << 56);
+    }
+
+    /* Exit to main loop to reevaluate s390_cpu_exec_interrupt.  */
+    return DISAS_PC_STALE_NOCHAIN;
+}
+
+static DisasJumpType op_stura(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st_tl(o->in1, o->in2, MMU_REAL_IDX, s->insn->data);
+
+    if (s->base.tb->flags & FLAG_MASK_PER) {
+        update_psw_addr(s);
+        gen_helper_per_store_real(cpu_env);
+    }
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_stfle(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stfle(cc_op, cpu_env, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_st8(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st8(o->in1, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_st16(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st16(o->in1, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_st32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st32(o->in1, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_st64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st64(o->in1, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stam(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    gen_helper_stam(cpu_env, r1, o->in2, r3);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stcm(DisasContext *s, DisasOps *o)
+{
+    int m3 = get_field(s, m3);
+    int pos, base = s->insn->data;
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    pos = base + ctz32(m3) * 8;
+    switch (m3) {
+    case 0xf:
+        /* Effectively a 32-bit store.  */
+        tcg_gen_shri_i64(tmp, o->in1, pos);
+        tcg_gen_qemu_st32(tmp, o->in2, get_mem_index(s));
+        break;
+
+    case 0xc:
+    case 0x6:
+    case 0x3:
+        /* Effectively a 16-bit store.  */
+        tcg_gen_shri_i64(tmp, o->in1, pos);
+        tcg_gen_qemu_st16(tmp, o->in2, get_mem_index(s));
+        break;
+
+    case 0x8:
+    case 0x4:
+    case 0x2:
+    case 0x1:
+        /* Effectively an 8-bit store.  */
+        tcg_gen_shri_i64(tmp, o->in1, pos);
+        tcg_gen_qemu_st8(tmp, o->in2, get_mem_index(s));
+        break;
+
+    default:
+        /* This is going to be a sequence of shifts and stores.  */
+        pos = base + 32 - 8;
+        while (m3) {
+            if (m3 & 0x8) {
+                tcg_gen_shri_i64(tmp, o->in1, pos);
+                tcg_gen_qemu_st8(tmp, o->in2, get_mem_index(s));
+                tcg_gen_addi_i64(o->in2, o->in2, 1);
+            }
+            m3 = (m3 << 1) & 0xf;
+            pos -= 8;
+        }
+        break;
+    }
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stm(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    int size = s->insn->data;
+    TCGv_i64 tsize = tcg_const_i64(size);
+
+    while (1) {
+        if (size == 8) {
+            tcg_gen_qemu_st64(regs[r1], o->in2, get_mem_index(s));
+        } else {
+            tcg_gen_qemu_st32(regs[r1], o->in2, get_mem_index(s));
+        }
+        if (r1 == r3) {
+            break;
+        }
+        tcg_gen_add_i64(o->in2, o->in2, tsize);
+        r1 = (r1 + 1) & 15;
+    }
+
+    tcg_temp_free_i64(tsize);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stmh(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i64 t = tcg_temp_new_i64();
+    TCGv_i64 t4 = tcg_const_i64(4);
+    TCGv_i64 t32 = tcg_const_i64(32);
+
+    while (1) {
+        tcg_gen_shl_i64(t, regs[r1], t32);
+        tcg_gen_qemu_st32(t, o->in2, get_mem_index(s));
+        if (r1 == r3) {
+            break;
+        }
+        tcg_gen_add_i64(o->in2, o->in2, t4);
+        r1 = (r1 + 1) & 15;
+    }
+
+    tcg_temp_free_i64(t);
+    tcg_temp_free_i64(t4);
+    tcg_temp_free_i64(t32);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stpq(DisasContext *s, DisasOps *o)
+{
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
+    } else if (HAVE_ATOMIC128) {
+        gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out);
+    } else {
+        gen_helper_exit_atomic(cpu_env);
+        return DISAS_NORETURN;
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_srst(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_srst(cpu_env, r1, r2);
+
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_srstu(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_srstu(cpu_env, r1, r2);
+
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sub(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_sub_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_subu64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_movi_i64(cc_src, 0);
+    tcg_gen_sub2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
+    return DISAS_NEXT;
+}
+
+/* Compute borrow (0, -1) into cc_src. */
+static void compute_borrow(DisasContext *s)
+{
+    switch (s->cc_op) {
+    case CC_OP_SUBU:
+        /* The borrow value is already in cc_src (0,-1). */
+        break;
+    default:
+        gen_op_calc_cc(s);
+        /* fall through */
+    case CC_OP_STATIC:
+        /* The carry flag is the msb of CC; compute into cc_src. */
+        tcg_gen_extu_i32_i64(cc_src, cc_op);
+        tcg_gen_shri_i64(cc_src, cc_src, 1);
+        /* fall through */
+    case CC_OP_ADDU:
+        /* Convert carry (1,0) to borrow (0,-1). */
+        tcg_gen_subi_i64(cc_src, cc_src, 1);
+        break;
+    }
+}
+
+static DisasJumpType op_subb32(DisasContext *s, DisasOps *o)
+{
+    compute_borrow(s);
+
+    /* Borrow is {0, -1}, so add to subtract. */
+    tcg_gen_add_i64(o->out, o->in1, cc_src);
+    tcg_gen_sub_i64(o->out, o->out, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_subb64(DisasContext *s, DisasOps *o)
+{
+    compute_borrow(s);
+
+    /*
+     * Borrow is {0, -1}, so add to subtract; replicate the
+     * borrow input to produce 128-bit -1 for the addition.
+     */
+    TCGv_i64 zero = tcg_const_i64(0);
+    tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, cc_src);
+    tcg_gen_sub2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero);
+    tcg_temp_free_i64(zero);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_svc(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 t;
+
+    update_psw_addr(s);
+    update_cc_op(s);
+
+    t = tcg_const_i32(get_field(s, i1) & 0xff);
+    tcg_gen_st_i32(t, cpu_env, offsetof(CPUS390XState, int_svc_code));
+    tcg_temp_free_i32(t);
+
+    t = tcg_const_i32(s->ilen);
+    tcg_gen_st_i32(t, cpu_env, offsetof(CPUS390XState, int_svc_ilen));
+    tcg_temp_free_i32(t);
+
+    gen_exception(EXCP_SVC);
+    return DISAS_NORETURN;
+}
+
+static DisasJumpType op_tam(DisasContext *s, DisasOps *o)
+{
+    int cc = 0;
+
+    cc |= (s->base.tb->flags & FLAG_MASK_64) ? 2 : 0;
+    cc |= (s->base.tb->flags & FLAG_MASK_32) ? 1 : 0;
+    gen_op_movi_cc(s, cc);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tceb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_tceb(cc_op, cpu_env, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tcdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_tcdb(cc_op, cpu_env, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tcxb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_tcxb(cc_op, cpu_env, o->out, o->out2, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+
+static DisasJumpType op_testblock(DisasContext *s, DisasOps *o)
+{
+    gen_helper_testblock(cc_op, cpu_env, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tprot(DisasContext *s, DisasOps *o)
+{
+    gen_helper_tprot(cc_op, cpu_env, o->addr1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+#endif
+
+static DisasJumpType op_tp(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l1 = tcg_const_i32(get_field(s, l1) + 1);
+    gen_helper_tp(cc_op, cpu_env, o->addr1, l1);
+    tcg_temp_free_i32(l1);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tr(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_tr(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tre(DisasContext *s, DisasOps *o)
+{
+    gen_helper_tre(o->out, cpu_env, o->out, o->out2, o->in2);
+    return_low128(o->out2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_trt(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_trt(cc_op, cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_trtr(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_trtr(cc_op, cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_trXX(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
+    TCGv_i32 sizes = tcg_const_i32(s->insn->opc & 3);
+    TCGv_i32 tst = tcg_temp_new_i32();
+    int m3 = get_field(s, m3);
+
+    if (!s390_has_feat(S390_FEAT_ETF2_ENH)) {
+        m3 = 0;
+    }
+    if (m3 & 1) {
+        tcg_gen_movi_i32(tst, -1);
+    } else {
+        tcg_gen_extrl_i64_i32(tst, regs[0]);
+        if (s->insn->opc & 3) {
+            tcg_gen_ext8u_i32(tst, tst);
+        } else {
+            tcg_gen_ext16u_i32(tst, tst);
+        }
+    }
+    gen_helper_trXX(cc_op, cpu_env, r1, r2, tst, sizes);
+
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r2);
+    tcg_temp_free_i32(sizes);
+    tcg_temp_free_i32(tst);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ts(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 t1 = tcg_const_i32(0xff);
+    tcg_gen_atomic_xchg_i32(t1, o->in2, t1, get_mem_index(s), MO_UB);
+    tcg_gen_extract_i32(cc_op, t1, 7, 1);
+    tcg_temp_free_i32(t1);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_unpk(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_unpk(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_unpka(DisasContext *s, DisasOps *o)
+{
+    int l1 = get_field(s, l1) + 1;
+    TCGv_i32 l;
+
+    /* The length must not exceed 32 bytes.  */
+    if (l1 > 32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    l = tcg_const_i32(l1);
+    gen_helper_unpka(cc_op, cpu_env, o->addr1, l, o->in2);
+    tcg_temp_free_i32(l);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_unpku(DisasContext *s, DisasOps *o)
+{
+    int l1 = get_field(s, l1) + 1;
+    TCGv_i32 l;
+
+    /* The length must be even and should not exceed 64 bytes.  */
+    if ((l1 & 1) || (l1 > 64)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    l = tcg_const_i32(l1);
+    gen_helper_unpku(cc_op, cpu_env, o->addr1, l, o->in2);
+    tcg_temp_free_i32(l);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+
+static DisasJumpType op_xc(DisasContext *s, DisasOps *o)
+{
+    int d1 = get_field(s, d1);
+    int d2 = get_field(s, d2);
+    int b1 = get_field(s, b1);
+    int b2 = get_field(s, b2);
+    int l = get_field(s, l1);
+    TCGv_i32 t32;
+
+    o->addr1 = get_address(s, 0, b1, d1);
+
+    /* If the addresses are identical, this is a store/memset of zero.  */
+    if (b1 == b2 && d1 == d2 && (l + 1) <= 32) {
+        o->in2 = tcg_const_i64(0);
+
+        l++;
+        while (l >= 8) {
+            tcg_gen_qemu_st64(o->in2, o->addr1, get_mem_index(s));
+            l -= 8;
+            if (l > 0) {
+                tcg_gen_addi_i64(o->addr1, o->addr1, 8);
+            }
+        }
+        if (l >= 4) {
+            tcg_gen_qemu_st32(o->in2, o->addr1, get_mem_index(s));
+            l -= 4;
+            if (l > 0) {
+                tcg_gen_addi_i64(o->addr1, o->addr1, 4);
+            }
+        }
+        if (l >= 2) {
+            tcg_gen_qemu_st16(o->in2, o->addr1, get_mem_index(s));
+            l -= 2;
+            if (l > 0) {
+                tcg_gen_addi_i64(o->addr1, o->addr1, 2);
+            }
+        }
+        if (l) {
+            tcg_gen_qemu_st8(o->in2, o->addr1, get_mem_index(s));
+        }
+        gen_op_movi_cc(s, 0);
+        return DISAS_NEXT;
+    }
+
+    /* But in general we'll defer to a helper.  */
+    o->in2 = get_address(s, 0, b2, d2);
+    t32 = tcg_const_i32(l);
+    gen_helper_xc(cc_op, cpu_env, t32, o->addr1, o->in2);
+    tcg_temp_free_i32(t32);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_xor(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_xor_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_xori(DisasContext *s, DisasOps *o)
+{
+    int shift = s->insn->data & 0xff;
+    int size = s->insn->data >> 8;
+    uint64_t mask = ((1ull << size) - 1) << shift;
+
+    assert(!o->g_in2);
+    tcg_gen_shli_i64(o->in2, o->in2, shift);
+    tcg_gen_xor_i64(o->out, o->in1, o->in2);
+
+    /* Produce the CC from only the bits manipulated.  */
+    tcg_gen_andi_i64(cc_dst, o->out, mask);
+    set_cc_nz_u64(s, cc_dst);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_xi(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+
+    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
+        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
+    } else {
+        /* Perform the atomic operation in memory. */
+        tcg_gen_atomic_fetch_xor_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
+                                     s->insn->data);
+    }
+
+    /* Recompute also for atomic case: needed for setting CC. */
+    tcg_gen_xor_i64(o->out, o->in1, o->in2);
+
+    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
+        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_zero(DisasContext *s, DisasOps *o)
+{
+    o->out = tcg_const_i64(0);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_zero2(DisasContext *s, DisasOps *o)
+{
+    o->out = tcg_const_i64(0);
+    o->out2 = o->out;
+    o->g_out2 = true;
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_clp(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_clp(cpu_env, r2);
+    tcg_temp_free_i32(r2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_pcilg(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_pcilg(cpu_env, r1, r2);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_pcistg(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_pcistg(cpu_env, r1, r2);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stpcifc(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 ar = tcg_const_i32(get_field(s, b2));
+
+    gen_helper_stpcifc(cpu_env, r1, o->addr1, ar);
+    tcg_temp_free_i32(ar);
+    tcg_temp_free_i32(r1);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sic(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sic(cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rpcit(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_rpcit(cpu_env, r1, r2);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_pcistb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    TCGv_i32 ar = tcg_const_i32(get_field(s, b2));
+
+    gen_helper_pcistb(cpu_env, r1, r3, o->addr1, ar);
+    tcg_temp_free_i32(ar);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mpcifc(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 ar = tcg_const_i32(get_field(s, b2));
+
+    gen_helper_mpcifc(cpu_env, r1, o->addr1, ar);
+    tcg_temp_free_i32(ar);
+    tcg_temp_free_i32(r1);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+#endif
+
+#include "translate_vx.c.inc"
+
+/* ====================================================================== */
+/* The "Cc OUTput" generators.  Given the generated output (and in some cases
+   the original inputs), update the various cc data structures in order to
+   be able to compute the new condition code.  */
+
+static void cout_abs32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_ABS_32, o->out);
+}
+
+static void cout_abs64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_ABS_64, o->out);
+}
+
+static void cout_adds32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update3_cc_i64(s, CC_OP_ADD_32, o->in1, o->in2, o->out);
+}
+
+static void cout_adds64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update3_cc_i64(s, CC_OP_ADD_64, o->in1, o->in2, o->out);
+}
+
+static void cout_addu32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_shri_i64(cc_src, o->out, 32);
+    tcg_gen_ext32u_i64(cc_dst, o->out);
+    gen_op_update2_cc_i64(s, CC_OP_ADDU, cc_src, cc_dst);
+}
+
+static void cout_addu64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_ADDU, cc_src, o->out);
+}
+
+static void cout_cmps32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_LTGT_32, o->in1, o->in2);
+}
+
+static void cout_cmps64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_LTGT_64, o->in1, o->in2);
+}
+
+static void cout_cmpu32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_LTUGTU_32, o->in1, o->in2);
+}
+
+static void cout_cmpu64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_LTUGTU_64, o->in1, o->in2);
+}
+
+static void cout_f32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_NZ_F32, o->out);
+}
+
+static void cout_f64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_NZ_F64, o->out);
+}
+
+static void cout_f128(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_NZ_F128, o->out, o->out2);
+}
+
+static void cout_nabs32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_NABS_32, o->out);
+}
+
+static void cout_nabs64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_NABS_64, o->out);
+}
+
+static void cout_neg32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_COMP_32, o->out);
+}
+
+static void cout_neg64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_COMP_64, o->out);
+}
+
+static void cout_nz32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ext32u_i64(cc_dst, o->out);
+    gen_op_update1_cc_i64(s, CC_OP_NZ, cc_dst);
+}
+
+static void cout_nz64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_NZ, o->out);
+}
+
+static void cout_s32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_LTGT0_32, o->out);
+}
+
+static void cout_s64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_LTGT0_64, o->out);
+}
+
+static void cout_subs32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update3_cc_i64(s, CC_OP_SUB_32, o->in1, o->in2, o->out);
+}
+
+static void cout_subs64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update3_cc_i64(s, CC_OP_SUB_64, o->in1, o->in2, o->out);
+}
+
+static void cout_subu32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_sari_i64(cc_src, o->out, 32);
+    tcg_gen_ext32u_i64(cc_dst, o->out);
+    gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, cc_dst);
+}
+
+static void cout_subu64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, o->out);
+}
+
+static void cout_tm32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_TM_32, o->in1, o->in2);
+}
+
+static void cout_tm64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_TM_64, o->in1, o->in2);
+}
+
+static void cout_muls32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_MULS_32, o->out);
+}
+
+static void cout_muls64(DisasContext *s, DisasOps *o)
+{
+    /* out contains "high" part, out2 contains "low" part of 128 bit result */
+    gen_op_update2_cc_i64(s, CC_OP_MULS_64, o->out, o->out2);
+}
+
+/* ====================================================================== */
+/* The "PREParation" generators.  These initialize the DisasOps.OUT fields
+   with the TCG register to which we will write.  Used in combination with
+   the "wout" generators, in some cases we need a new temporary, and in
+   some cases we can write to a TCG global.  */
+
+static void prep_new(DisasContext *s, DisasOps *o)
+{
+    o->out = tcg_temp_new_i64();
+}
+#define SPEC_prep_new 0
+
+static void prep_new_P(DisasContext *s, DisasOps *o)
+{
+    o->out = tcg_temp_new_i64();
+    o->out2 = tcg_temp_new_i64();
+}
+#define SPEC_prep_new_P 0
+
+static void prep_r1(DisasContext *s, DisasOps *o)
+{
+    o->out = regs[get_field(s, r1)];
+    o->g_out = true;
+}
+#define SPEC_prep_r1 0
+
+static void prep_r1_P(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    o->out = regs[r1];
+    o->out2 = regs[r1 + 1];
+    o->g_out = o->g_out2 = true;
+}
+#define SPEC_prep_r1_P SPEC_r1_even
+
+/* Whenever we need x1 in addition to other inputs, we'll load it to out/out2 */
+static void prep_x1(DisasContext *s, DisasOps *o)
+{
+    o->out = load_freg(get_field(s, r1));
+    o->out2 = load_freg(get_field(s, r1) + 2);
+}
+#define SPEC_prep_x1 SPEC_r1_f128
+
+/* ====================================================================== */
+/* The "Write OUTput" generators.  These generally perform some non-trivial
+   copy of data to TCG globals, or to main memory.  The trivial cases are
+   generally handled by having a "prep" generator install the TCG global
+   as the destination of the operation.  */
+
+static void wout_r1(DisasContext *s, DisasOps *o)
+{
+    store_reg(get_field(s, r1), o->out);
+}
+#define SPEC_wout_r1 0
+
+static void wout_out2_r1(DisasContext *s, DisasOps *o)
+{
+    store_reg(get_field(s, r1), o->out2);
+}
+#define SPEC_wout_out2_r1 0
+
+static void wout_r1_8(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    tcg_gen_deposit_i64(regs[r1], regs[r1], o->out, 0, 8);
+}
+#define SPEC_wout_r1_8 0
+
+static void wout_r1_16(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    tcg_gen_deposit_i64(regs[r1], regs[r1], o->out, 0, 16);
+}
+#define SPEC_wout_r1_16 0
+
+static void wout_r1_32(DisasContext *s, DisasOps *o)
+{
+    store_reg32_i64(get_field(s, r1), o->out);
+}
+#define SPEC_wout_r1_32 0
+
+static void wout_r1_32h(DisasContext *s, DisasOps *o)
+{
+    store_reg32h_i64(get_field(s, r1), o->out);
+}
+#define SPEC_wout_r1_32h 0
+
+static void wout_r1_P32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    store_reg32_i64(r1, o->out);
+    store_reg32_i64(r1 + 1, o->out2);
+}
+#define SPEC_wout_r1_P32 SPEC_r1_even
+
+static void wout_r1_D32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    store_reg32_i64(r1 + 1, o->out);
+    tcg_gen_shri_i64(o->out, o->out, 32);
+    store_reg32_i64(r1, o->out);
+}
+#define SPEC_wout_r1_D32 SPEC_r1_even
+
+static void wout_r3_P32(DisasContext *s, DisasOps *o)
+{
+    int r3 = get_field(s, r3);
+    store_reg32_i64(r3, o->out);
+    store_reg32_i64(r3 + 1, o->out2);
+}
+#define SPEC_wout_r3_P32 SPEC_r3_even
+
+static void wout_r3_P64(DisasContext *s, DisasOps *o)
+{
+    int r3 = get_field(s, r3);
+    store_reg(r3, o->out);
+    store_reg(r3 + 1, o->out2);
+}
+#define SPEC_wout_r3_P64 SPEC_r3_even
+
+static void wout_e1(DisasContext *s, DisasOps *o)
+{
+    store_freg32_i64(get_field(s, r1), o->out);
+}
+#define SPEC_wout_e1 0
+
+static void wout_f1(DisasContext *s, DisasOps *o)
+{
+    store_freg(get_field(s, r1), o->out);
+}
+#define SPEC_wout_f1 0
+
+static void wout_x1(DisasContext *s, DisasOps *o)
+{
+    int f1 = get_field(s, r1);
+    store_freg(f1, o->out);
+    store_freg(f1 + 2, o->out2);
+}
+#define SPEC_wout_x1 SPEC_r1_f128
+
+static void wout_cond_r1r2_32(DisasContext *s, DisasOps *o)
+{
+    if (get_field(s, r1) != get_field(s, r2)) {
+        store_reg32_i64(get_field(s, r1), o->out);
+    }
+}
+#define SPEC_wout_cond_r1r2_32 0
+
+static void wout_cond_e1e2(DisasContext *s, DisasOps *o)
+{
+    if (get_field(s, r1) != get_field(s, r2)) {
+        store_freg32_i64(get_field(s, r1), o->out);
+    }
+}
+#define SPEC_wout_cond_e1e2 0
+
+static void wout_m1_8(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st8(o->out, o->addr1, get_mem_index(s));
+}
+#define SPEC_wout_m1_8 0
+
+static void wout_m1_16(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st16(o->out, o->addr1, get_mem_index(s));
+}
+#define SPEC_wout_m1_16 0
+
+#ifndef CONFIG_USER_ONLY
+static void wout_m1_16a(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), MO_TEUW | MO_ALIGN);
+}
+#define SPEC_wout_m1_16a 0
+#endif
+
+static void wout_m1_32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st32(o->out, o->addr1, get_mem_index(s));
+}
+#define SPEC_wout_m1_32 0
+
+#ifndef CONFIG_USER_ONLY
+static void wout_m1_32a(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), MO_TEUL | MO_ALIGN);
+}
+#define SPEC_wout_m1_32a 0
+#endif
+
+static void wout_m1_64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st64(o->out, o->addr1, get_mem_index(s));
+}
+#define SPEC_wout_m1_64 0
+
+#ifndef CONFIG_USER_ONLY
+static void wout_m1_64a(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st_i64(o->out, o->addr1, get_mem_index(s), MO_TEQ | MO_ALIGN);
+}
+#define SPEC_wout_m1_64a 0
+#endif
+
+static void wout_m2_32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st32(o->out, o->in2, get_mem_index(s));
+}
+#define SPEC_wout_m2_32 0
+
+static void wout_in2_r1(DisasContext *s, DisasOps *o)
+{
+    store_reg(get_field(s, r1), o->in2);
+}
+#define SPEC_wout_in2_r1 0
+
+static void wout_in2_r1_32(DisasContext *s, DisasOps *o)
+{
+    store_reg32_i64(get_field(s, r1), o->in2);
+}
+#define SPEC_wout_in2_r1_32 0
+
+/* ====================================================================== */
+/* The "INput 1" generators.  These load the first operand to an insn.  */
+
+static void in1_r1(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_reg(get_field(s, r1));
+}
+#define SPEC_in1_r1 0
+
+static void in1_r1_o(DisasContext *s, DisasOps *o)
+{
+    o->in1 = regs[get_field(s, r1)];
+    o->g_in1 = true;
+}
+#define SPEC_in1_r1_o 0
+
+static void in1_r1_32s(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_ext32s_i64(o->in1, regs[get_field(s, r1)]);
+}
+#define SPEC_in1_r1_32s 0
+
+static void in1_r1_32u(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r1)]);
+}
+#define SPEC_in1_r1_32u 0
+
+static void in1_r1_sr32(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_shri_i64(o->in1, regs[get_field(s, r1)], 32);
+}
+#define SPEC_in1_r1_sr32 0
+
+static void in1_r1p1(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_reg(get_field(s, r1) + 1);
+}
+#define SPEC_in1_r1p1 SPEC_r1_even
+
+static void in1_r1p1_o(DisasContext *s, DisasOps *o)
+{
+    o->in1 = regs[get_field(s, r1) + 1];
+    o->g_in1 = true;
+}
+#define SPEC_in1_r1p1_o SPEC_r1_even
+
+static void in1_r1p1_32s(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_ext32s_i64(o->in1, regs[get_field(s, r1) + 1]);
+}
+#define SPEC_in1_r1p1_32s SPEC_r1_even
+
+static void in1_r1p1_32u(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r1) + 1]);
+}
+#define SPEC_in1_r1p1_32u SPEC_r1_even
+
+static void in1_r1_D32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_concat32_i64(o->in1, regs[r1 + 1], regs[r1]);
+}
+#define SPEC_in1_r1_D32 SPEC_r1_even
+
+static void in1_r2(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_reg(get_field(s, r2));
+}
+#define SPEC_in1_r2 0
+
+static void in1_r2_sr32(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_shri_i64(o->in1, regs[get_field(s, r2)], 32);
+}
+#define SPEC_in1_r2_sr32 0
+
+static void in1_r2_32u(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r2)]);
+}
+#define SPEC_in1_r2_32u 0
+
+static void in1_r3(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_reg(get_field(s, r3));
+}
+#define SPEC_in1_r3 0
+
+static void in1_r3_o(DisasContext *s, DisasOps *o)
+{
+    o->in1 = regs[get_field(s, r3)];
+    o->g_in1 = true;
+}
+#define SPEC_in1_r3_o 0
+
+static void in1_r3_32s(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_ext32s_i64(o->in1, regs[get_field(s, r3)]);
+}
+#define SPEC_in1_r3_32s 0
+
+static void in1_r3_32u(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r3)]);
+}
+#define SPEC_in1_r3_32u 0
+
+static void in1_r3_D32(DisasContext *s, DisasOps *o)
+{
+    int r3 = get_field(s, r3);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_concat32_i64(o->in1, regs[r3 + 1], regs[r3]);
+}
+#define SPEC_in1_r3_D32 SPEC_r3_even
+
+static void in1_e1(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_freg32_i64(get_field(s, r1));
+}
+#define SPEC_in1_e1 0
+
+static void in1_f1(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_freg(get_field(s, r1));
+}
+#define SPEC_in1_f1 0
+
+/* Load the high double word of an extended (128-bit) format FP number */
+static void in1_x2h(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_freg(get_field(s, r2));
+}
+#define SPEC_in1_x2h SPEC_r2_f128
+
+static void in1_f3(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_freg(get_field(s, r3));
+}
+#define SPEC_in1_f3 0
+
+static void in1_la1(DisasContext *s, DisasOps *o)
+{
+    o->addr1 = get_address(s, 0, get_field(s, b1), get_field(s, d1));
+}
+#define SPEC_in1_la1 0
+
+static void in1_la2(DisasContext *s, DisasOps *o)
+{
+    int x2 = have_field(s, x2) ? get_field(s, x2) : 0;
+    o->addr1 = get_address(s, x2, get_field(s, b2), get_field(s, d2));
+}
+#define SPEC_in1_la2 0
+
+static void in1_m1_8u(DisasContext *s, DisasOps *o)
+{
+    in1_la1(s, o);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld8u(o->in1, o->addr1, get_mem_index(s));
+}
+#define SPEC_in1_m1_8u 0
+
+static void in1_m1_16s(DisasContext *s, DisasOps *o)
+{
+    in1_la1(s, o);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld16s(o->in1, o->addr1, get_mem_index(s));
+}
+#define SPEC_in1_m1_16s 0
+
+static void in1_m1_16u(DisasContext *s, DisasOps *o)
+{
+    in1_la1(s, o);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld16u(o->in1, o->addr1, get_mem_index(s));
+}
+#define SPEC_in1_m1_16u 0
+
+static void in1_m1_32s(DisasContext *s, DisasOps *o)
+{
+    in1_la1(s, o);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld32s(o->in1, o->addr1, get_mem_index(s));
+}
+#define SPEC_in1_m1_32s 0
+
+static void in1_m1_32u(DisasContext *s, DisasOps *o)
+{
+    in1_la1(s, o);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld32u(o->in1, o->addr1, get_mem_index(s));
+}
+#define SPEC_in1_m1_32u 0
+
+static void in1_m1_64(DisasContext *s, DisasOps *o)
+{
+    in1_la1(s, o);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld64(o->in1, o->addr1, get_mem_index(s));
+}
+#define SPEC_in1_m1_64 0
+
+/* ====================================================================== */
+/* The "INput 2" generators.  These load the second operand to an insn.  */
+
+static void in2_r1_o(DisasContext *s, DisasOps *o)
+{
+    o->in2 = regs[get_field(s, r1)];
+    o->g_in2 = true;
+}
+#define SPEC_in2_r1_o 0
+
+static void in2_r1_16u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext16u_i64(o->in2, regs[get_field(s, r1)]);
+}
+#define SPEC_in2_r1_16u 0
+
+static void in2_r1_32u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext32u_i64(o->in2, regs[get_field(s, r1)]);
+}
+#define SPEC_in2_r1_32u 0
+
+static void in2_r1_D32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_concat32_i64(o->in2, regs[r1 + 1], regs[r1]);
+}
+#define SPEC_in2_r1_D32 SPEC_r1_even
+
+static void in2_r2(DisasContext *s, DisasOps *o)
+{
+    o->in2 = load_reg(get_field(s, r2));
+}
+#define SPEC_in2_r2 0
+
+static void in2_r2_o(DisasContext *s, DisasOps *o)
+{
+    o->in2 = regs[get_field(s, r2)];
+    o->g_in2 = true;
+}
+#define SPEC_in2_r2_o 0
+
+static void in2_r2_nz(DisasContext *s, DisasOps *o)
+{
+    int r2 = get_field(s, r2);
+    if (r2 != 0) {
+        o->in2 = load_reg(r2);
+    }
+}
+#define SPEC_in2_r2_nz 0
+
+static void in2_r2_8s(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext8s_i64(o->in2, regs[get_field(s, r2)]);
+}
+#define SPEC_in2_r2_8s 0
+
+static void in2_r2_8u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext8u_i64(o->in2, regs[get_field(s, r2)]);
+}
+#define SPEC_in2_r2_8u 0
+
+static void in2_r2_16s(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext16s_i64(o->in2, regs[get_field(s, r2)]);
+}
+#define SPEC_in2_r2_16s 0
+
+static void in2_r2_16u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext16u_i64(o->in2, regs[get_field(s, r2)]);
+}
+#define SPEC_in2_r2_16u 0
+
+static void in2_r3(DisasContext *s, DisasOps *o)
+{
+    o->in2 = load_reg(get_field(s, r3));
+}
+#define SPEC_in2_r3 0
+
+static void in2_r3_sr32(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_shri_i64(o->in2, regs[get_field(s, r3)], 32);
+}
+#define SPEC_in2_r3_sr32 0
+
+static void in2_r3_32u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext32u_i64(o->in2, regs[get_field(s, r3)]);
+}
+#define SPEC_in2_r3_32u 0
+
+static void in2_r2_32s(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext32s_i64(o->in2, regs[get_field(s, r2)]);
+}
+#define SPEC_in2_r2_32s 0
+
+static void in2_r2_32u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext32u_i64(o->in2, regs[get_field(s, r2)]);
+}
+#define SPEC_in2_r2_32u 0
+
+static void in2_r2_sr32(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_shri_i64(o->in2, regs[get_field(s, r2)], 32);
+}
+#define SPEC_in2_r2_sr32 0
+
+static void in2_e2(DisasContext *s, DisasOps *o)
+{
+    o->in2 = load_freg32_i64(get_field(s, r2));
+}
+#define SPEC_in2_e2 0
+
+static void in2_f2(DisasContext *s, DisasOps *o)
+{
+    o->in2 = load_freg(get_field(s, r2));
+}
+#define SPEC_in2_f2 0
+
+/* Load the low double word of an extended (128-bit) format FP number */
+static void in2_x2l(DisasContext *s, DisasOps *o)
+{
+    o->in2 = load_freg(get_field(s, r2) + 2);
+}
+#define SPEC_in2_x2l SPEC_r2_f128
+
+static void in2_ra2(DisasContext *s, DisasOps *o)
+{
+    int r2 = get_field(s, r2);
+
+    /* Note: *don't* treat !r2 as 0, use the reg value. */
+    o->in2 = tcg_temp_new_i64();
+    gen_addi_and_wrap_i64(s, o->in2, regs[r2], 0);
+}
+#define SPEC_in2_ra2 0
+
+static void in2_a2(DisasContext *s, DisasOps *o)
+{
+    int x2 = have_field(s, x2) ? get_field(s, x2) : 0;
+    o->in2 = get_address(s, x2, get_field(s, b2), get_field(s, d2));
+}
+#define SPEC_in2_a2 0
+
+static void in2_ri2(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_const_i64(s->base.pc_next + (int64_t)get_field(s, i2) * 2);
+}
+#define SPEC_in2_ri2 0
+
+static void in2_sh32(DisasContext *s, DisasOps *o)
+{
+    help_l2_shift(s, o, 31);
+}
+#define SPEC_in2_sh32 0
+
+static void in2_sh64(DisasContext *s, DisasOps *o)
+{
+    help_l2_shift(s, o, 63);
+}
+#define SPEC_in2_sh64 0
+
+static void in2_m2_8u(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld8u(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_m2_8u 0
+
+static void in2_m2_16s(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld16s(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_m2_16s 0
+
+static void in2_m2_16u(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld16u(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_m2_16u 0
+
+static void in2_m2_32s(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld32s(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_m2_32s 0
+
+static void in2_m2_32u(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld32u(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_m2_32u 0
+
+#ifndef CONFIG_USER_ONLY
+static void in2_m2_32ua(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld_tl(o->in2, o->in2, get_mem_index(s), MO_TEUL | MO_ALIGN);
+}
+#define SPEC_in2_m2_32ua 0
+#endif
+
+static void in2_m2_64(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld64(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_m2_64 0
+
+static void in2_m2_64w(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld64(o->in2, o->in2, get_mem_index(s));
+    gen_addi_and_wrap_i64(s, o->in2, o->in2, 0);
+}
+#define SPEC_in2_m2_64w 0
+
+#ifndef CONFIG_USER_ONLY
+static void in2_m2_64a(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld_i64(o->in2, o->in2, get_mem_index(s), MO_TEQ | MO_ALIGN);
+}
+#define SPEC_in2_m2_64a 0
+#endif
+
+static void in2_mri2_16u(DisasContext *s, DisasOps *o)
+{
+    in2_ri2(s, o);
+    tcg_gen_qemu_ld16u(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_mri2_16u 0
+
+static void in2_mri2_32s(DisasContext *s, DisasOps *o)
+{
+    in2_ri2(s, o);
+    tcg_gen_qemu_ld32s(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_mri2_32s 0
+
+static void in2_mri2_32u(DisasContext *s, DisasOps *o)
+{
+    in2_ri2(s, o);
+    tcg_gen_qemu_ld32u(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_mri2_32u 0
+
+static void in2_mri2_64(DisasContext *s, DisasOps *o)
+{
+    in2_ri2(s, o);
+    tcg_gen_qemu_ld64(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_mri2_64 0
+
+static void in2_i2(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_const_i64(get_field(s, i2));
+}
+#define SPEC_in2_i2 0
+
+static void in2_i2_8u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_const_i64((uint8_t)get_field(s, i2));
+}
+#define SPEC_in2_i2_8u 0
+
+static void in2_i2_16u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_const_i64((uint16_t)get_field(s, i2));
+}
+#define SPEC_in2_i2_16u 0
+
+static void in2_i2_32u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_const_i64((uint32_t)get_field(s, i2));
+}
+#define SPEC_in2_i2_32u 0
+
+static void in2_i2_16u_shl(DisasContext *s, DisasOps *o)
+{
+    uint64_t i2 = (uint16_t)get_field(s, i2);
+    o->in2 = tcg_const_i64(i2 << s->insn->data);
+}
+#define SPEC_in2_i2_16u_shl 0
+
+static void in2_i2_32u_shl(DisasContext *s, DisasOps *o)
+{
+    uint64_t i2 = (uint32_t)get_field(s, i2);
+    o->in2 = tcg_const_i64(i2 << s->insn->data);
+}
+#define SPEC_in2_i2_32u_shl 0
+
+#ifndef CONFIG_USER_ONLY
+static void in2_insn(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_const_i64(s->fields.raw_insn);
+}
+#define SPEC_in2_insn 0
+#endif
+
+/* ====================================================================== */
+
+/* Find opc within the table of insns.  This is formulated as a switch
+   statement so that (1) we get compile-time notice of cut-paste errors
+   for duplicated opcodes, and (2) the compiler generates the binary
+   search tree, rather than us having to post-process the table.  */
+
+#define C(OPC, NM, FT, FC, I1, I2, P, W, OP, CC) \
+    E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, 0, 0)
+
+#define D(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D) \
+    E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, 0)
+
+#define F(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, FL) \
+    E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, 0, FL)
+
+#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) insn_ ## NM,
+
+enum DisasInsnEnum {
+#include "insn-data.def"
+};
+
+#undef E
+#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) {                   \
+    .opc = OPC,                                                             \
+    .flags = FL,                                                            \
+    .fmt = FMT_##FT,                                                        \
+    .fac = FAC_##FC,                                                        \
+    .spec = SPEC_in1_##I1 | SPEC_in2_##I2 | SPEC_prep_##P | SPEC_wout_##W,  \
+    .name = #NM,                                                            \
+    .help_in1 = in1_##I1,                                                   \
+    .help_in2 = in2_##I2,                                                   \
+    .help_prep = prep_##P,                                                  \
+    .help_wout = wout_##W,                                                  \
+    .help_cout = cout_##CC,                                                 \
+    .help_op = op_##OP,                                                     \
+    .data = D                                                               \
+ },
+
+/* Allow 0 to be used for NULL in the table below.  */
+#define in1_0  NULL
+#define in2_0  NULL
+#define prep_0  NULL
+#define wout_0  NULL
+#define cout_0  NULL
+#define op_0  NULL
+
+#define SPEC_in1_0 0
+#define SPEC_in2_0 0
+#define SPEC_prep_0 0
+#define SPEC_wout_0 0
+
+/* Give smaller names to the various facilities.  */
+#define FAC_Z           S390_FEAT_ZARCH
+#define FAC_CASS        S390_FEAT_COMPARE_AND_SWAP_AND_STORE
+#define FAC_DFP         S390_FEAT_DFP
+#define FAC_DFPR        S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* DFP-rounding */
+#define FAC_DO          S390_FEAT_STFLE_45 /* distinct-operands */
+#define FAC_EE          S390_FEAT_EXECUTE_EXT
+#define FAC_EI          S390_FEAT_EXTENDED_IMMEDIATE
+#define FAC_FPE         S390_FEAT_FLOATING_POINT_EXT
+#define FAC_FPSSH       S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* FPS-sign-handling */
+#define FAC_FPRGR       S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* FPR-GR-transfer */
+#define FAC_GIE         S390_FEAT_GENERAL_INSTRUCTIONS_EXT
+#define FAC_HFP_MA      S390_FEAT_HFP_MADDSUB
+#define FAC_HW          S390_FEAT_STFLE_45 /* high-word */
+#define FAC_IEEEE_SIM   S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* IEEE-exception-simulation */
+#define FAC_MIE         S390_FEAT_STFLE_49 /* misc-instruction-extensions */
+#define FAC_LAT         S390_FEAT_STFLE_49 /* load-and-trap */
+#define FAC_LOC         S390_FEAT_STFLE_45 /* load/store on condition 1 */
+#define FAC_LOC2        S390_FEAT_STFLE_53 /* load/store on condition 2 */
+#define FAC_LD          S390_FEAT_LONG_DISPLACEMENT
+#define FAC_PC          S390_FEAT_STFLE_45 /* population count */
+#define FAC_SCF         S390_FEAT_STORE_CLOCK_FAST
+#define FAC_SFLE        S390_FEAT_STFLE
+#define FAC_ILA         S390_FEAT_STFLE_45 /* interlocked-access-facility 1 */
+#define FAC_MVCOS       S390_FEAT_MOVE_WITH_OPTIONAL_SPEC
+#define FAC_LPP         S390_FEAT_SET_PROGRAM_PARAMETERS /* load-program-parameter */
+#define FAC_DAT_ENH     S390_FEAT_DAT_ENH
+#define FAC_E2          S390_FEAT_EXTENDED_TRANSLATION_2
+#define FAC_EH          S390_FEAT_STFLE_49 /* execution-hint */
+#define FAC_PPA         S390_FEAT_STFLE_49 /* processor-assist */
+#define FAC_LZRB        S390_FEAT_STFLE_53 /* load-and-zero-rightmost-byte */
+#define FAC_ETF3        S390_FEAT_EXTENDED_TRANSLATION_3
+#define FAC_MSA         S390_FEAT_MSA /* message-security-assist facility */
+#define FAC_MSA3        S390_FEAT_MSA_EXT_3 /* msa-extension-3 facility */
+#define FAC_MSA4        S390_FEAT_MSA_EXT_4 /* msa-extension-4 facility */
+#define FAC_MSA5        S390_FEAT_MSA_EXT_5 /* msa-extension-5 facility */
+#define FAC_MSA8        S390_FEAT_MSA_EXT_8 /* msa-extension-8 facility */
+#define FAC_ECT         S390_FEAT_EXTRACT_CPU_TIME
+#define FAC_PCI         S390_FEAT_ZPCI /* z/PCI facility */
+#define FAC_AIS         S390_FEAT_ADAPTER_INT_SUPPRESSION
+#define FAC_V           S390_FEAT_VECTOR /* vector facility */
+#define FAC_VE          S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 */
+#define FAC_MIE2        S390_FEAT_MISC_INSTRUCTION_EXT2 /* miscellaneous-instruction-extensions facility 2 */
+
+static const DisasInsn insn_info[] = {
+#include "insn-data.def"
+};
+
+#undef E
+#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) \
+    case OPC: return &insn_info[insn_ ## NM];
+
+static const DisasInsn *lookup_opc(uint16_t opc)
+{
+    switch (opc) {
+#include "insn-data.def"
+    default:
+        return NULL;
+    }
+}
+
+#undef F
+#undef E
+#undef D
+#undef C
+
+/* Extract a field from the insn.  The INSN should be left-aligned in
+   the uint64_t so that we can more easily utilize the big-bit-endian
+   definitions we extract from the Principals of Operation.  */
+
+static void extract_field(DisasFields *o, const DisasField *f, uint64_t insn)
+{
+    uint32_t r, m;
+
+    if (f->size == 0) {
+        return;
+    }
+
+    /* Zero extract the field from the insn.  */
+    r = (insn << f->beg) >> (64 - f->size);
+
+    /* Sign-extend, or un-swap the field as necessary.  */
+    switch (f->type) {
+    case 0: /* unsigned */
+        break;
+    case 1: /* signed */
+        assert(f->size <= 32);
+        m = 1u << (f->size - 1);
+        r = (r ^ m) - m;
+        break;
+    case 2: /* dl+dh split, signed 20 bit. */
+        r = ((int8_t)r << 12) | (r >> 8);
+        break;
+    case 3: /* MSB stored in RXB */
+        g_assert(f->size == 4);
+        switch (f->beg) {
+        case 8:
+            r |= extract64(insn, 63 - 36, 1) << 4;
+            break;
+        case 12:
+            r |= extract64(insn, 63 - 37, 1) << 4;
+            break;
+        case 16:
+            r |= extract64(insn, 63 - 38, 1) << 4;
+            break;
+        case 32:
+            r |= extract64(insn, 63 - 39, 1) << 4;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        break;
+    default:
+        abort();
+    }
+
+    /*
+     * Validate that the "compressed" encoding we selected above is valid.
+     * I.e. we haven't made two different original fields overlap.
+     */
+    assert(((o->presentC >> f->indexC) & 1) == 0);
+    o->presentC |= 1 << f->indexC;
+    o->presentO |= 1 << f->indexO;
+
+    o->c[f->indexC] = r;
+}
+
+/* Lookup the insn at the current PC, extracting the operands into O and
+   returning the info struct for the insn.  Returns NULL for invalid insn.  */
+
+static const DisasInsn *extract_insn(CPUS390XState *env, DisasContext *s)
+{
+    uint64_t insn, pc = s->base.pc_next;
+    int op, op2, ilen;
+    const DisasInsn *info;
+
+    if (unlikely(s->ex_value)) {
+        /* Drop the EX data now, so that it's clear on exception paths.  */
+        TCGv_i64 zero = tcg_const_i64(0);
+        tcg_gen_st_i64(zero, cpu_env, offsetof(CPUS390XState, ex_value));
+        tcg_temp_free_i64(zero);
+
+        /* Extract the values saved by EXECUTE.  */
+        insn = s->ex_value & 0xffffffffffff0000ull;
+        ilen = s->ex_value & 0xf;
+        op = insn >> 56;
+    } else {
+        insn = ld_code2(env, pc);
+        op = (insn >> 8) & 0xff;
+        ilen = get_ilen(op);
+        switch (ilen) {
+        case 2:
+            insn = insn << 48;
+            break;
+        case 4:
+            insn = ld_code4(env, pc) << 32;
+            break;
+        case 6:
+            insn = (insn << 48) | (ld_code4(env, pc + 2) << 16);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+    s->pc_tmp = s->base.pc_next + ilen;
+    s->ilen = ilen;
+
+    /* We can't actually determine the insn format until we've looked up
+       the full insn opcode.  Which we can't do without locating the
+       secondary opcode.  Assume by default that OP2 is at bit 40; for
+       those smaller insns that don't actually have a secondary opcode
+       this will correctly result in OP2 = 0. */
+    switch (op) {
+    case 0x01: /* E */
+    case 0x80: /* S */
+    case 0x82: /* S */
+    case 0x93: /* S */
+    case 0xb2: /* S, RRF, RRE, IE */
+    case 0xb3: /* RRE, RRD, RRF */
+    case 0xb9: /* RRE, RRF */
+    case 0xe5: /* SSE, SIL */
+        op2 = (insn << 8) >> 56;
+        break;
+    case 0xa5: /* RI */
+    case 0xa7: /* RI */
+    case 0xc0: /* RIL */
+    case 0xc2: /* RIL */
+    case 0xc4: /* RIL */
+    case 0xc6: /* RIL */
+    case 0xc8: /* SSF */
+    case 0xcc: /* RIL */
+        op2 = (insn << 12) >> 60;
+        break;
+    case 0xc5: /* MII */
+    case 0xc7: /* SMI */
+    case 0xd0 ... 0xdf: /* SS */
+    case 0xe1: /* SS */
+    case 0xe2: /* SS */
+    case 0xe8: /* SS */
+    case 0xe9: /* SS */
+    case 0xea: /* SS */
+    case 0xee ... 0xf3: /* SS */
+    case 0xf8 ... 0xfd: /* SS */
+        op2 = 0;
+        break;
+    default:
+        op2 = (insn << 40) >> 56;
+        break;
+    }
+
+    memset(&s->fields, 0, sizeof(s->fields));
+    s->fields.raw_insn = insn;
+    s->fields.op = op;
+    s->fields.op2 = op2;
+
+    /* Lookup the instruction.  */
+    info = lookup_opc(op << 8 | op2);
+    s->insn = info;
+
+    /* If we found it, extract the operands.  */
+    if (info != NULL) {
+        DisasFormat fmt = info->fmt;
+        int i;
+
+        for (i = 0; i < NUM_C_FIELD; ++i) {
+            extract_field(&s->fields, &format_info[fmt].op[i], insn);
+        }
+    }
+    return info;
+}
+
+static bool is_afp_reg(int reg)
+{
+    return reg % 2 || reg > 6;
+}
+
+static bool is_fp_pair(int reg)
+{
+    /* 0,1,4,5,8,9,12,13: to exclude the others, check for single bit */
+    return !(reg & 0x2);
+}
+
+static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
+{
+    const DisasInsn *insn;
+    DisasJumpType ret = DISAS_NEXT;
+    DisasOps o = {};
+    bool icount = false;
+
+    /* Search for the insn in the table.  */
+    insn = extract_insn(env, s);
+
+    /* Emit insn_start now that we know the ILEN.  */
+    tcg_gen_insn_start(s->base.pc_next, s->cc_op, s->ilen);
+
+    /* Not found means unimplemented/illegal opcode.  */
+    if (insn == NULL) {
+        qemu_log_mask(LOG_UNIMP, "unimplemented opcode 0x%02x%02x\n",
+                      s->fields.op, s->fields.op2);
+        gen_illegal_opcode(s);
+        ret = DISAS_NORETURN;
+        goto out;
+    }
+
+#ifndef CONFIG_USER_ONLY
+    if (s->base.tb->flags & FLAG_MASK_PER) {
+        TCGv_i64 addr = tcg_const_i64(s->base.pc_next);
+        gen_helper_per_ifetch(cpu_env, addr);
+        tcg_temp_free_i64(addr);
+    }
+#endif
+
+    /* process flags */
+    if (insn->flags) {
+        /* privileged instruction */
+        if ((s->base.tb->flags & FLAG_MASK_PSTATE) && (insn->flags & IF_PRIV)) {
+            gen_program_exception(s, PGM_PRIVILEGED);
+            ret = DISAS_NORETURN;
+            goto out;
+        }
+
+        /* if AFP is not enabled, instructions and registers are forbidden */
+        if (!(s->base.tb->flags & FLAG_MASK_AFP)) {
+            uint8_t dxc = 0;
+
+            if ((insn->flags & IF_AFP1) && is_afp_reg(get_field(s, r1))) {
+                dxc = 1;
+            }
+            if ((insn->flags & IF_AFP2) && is_afp_reg(get_field(s, r2))) {
+                dxc = 1;
+            }
+            if ((insn->flags & IF_AFP3) && is_afp_reg(get_field(s, r3))) {
+                dxc = 1;
+            }
+            if (insn->flags & IF_BFP) {
+                dxc = 2;
+            }
+            if (insn->flags & IF_DFP) {
+                dxc = 3;
+            }
+            if (insn->flags & IF_VEC) {
+                dxc = 0xfe;
+            }
+            if (dxc) {
+                gen_data_exception(dxc);
+                ret = DISAS_NORETURN;
+                goto out;
+            }
+        }
+
+        /* if vector instructions not enabled, executing them is forbidden */
+        if (insn->flags & IF_VEC) {
+            if (!((s->base.tb->flags & FLAG_MASK_VECTOR))) {
+                gen_data_exception(0xfe);
+                ret = DISAS_NORETURN;
+                goto out;
+            }
+        }
+
+        /* input/output is the special case for icount mode */
+        if (unlikely(insn->flags & IF_IO)) {
+            icount = tb_cflags(s->base.tb) & CF_USE_ICOUNT;
+            if (icount) {
+                gen_io_start();
+            }
+        }
+    }
+
+    /* Check for insn specification exceptions.  */
+    if (insn->spec) {
+        if ((insn->spec & SPEC_r1_even && get_field(s, r1) & 1) ||
+            (insn->spec & SPEC_r2_even && get_field(s, r2) & 1) ||
+            (insn->spec & SPEC_r3_even && get_field(s, r3) & 1) ||
+            (insn->spec & SPEC_r1_f128 && !is_fp_pair(get_field(s, r1))) ||
+            (insn->spec & SPEC_r2_f128 && !is_fp_pair(get_field(s, r2)))) {
+            gen_program_exception(s, PGM_SPECIFICATION);
+            ret = DISAS_NORETURN;
+            goto out;
+        }
+    }
+
+    /* Implement the instruction.  */
+    if (insn->help_in1) {
+        insn->help_in1(s, &o);
+    }
+    if (insn->help_in2) {
+        insn->help_in2(s, &o);
+    }
+    if (insn->help_prep) {
+        insn->help_prep(s, &o);
+    }
+    if (insn->help_op) {
+        ret = insn->help_op(s, &o);
+    }
+    if (ret != DISAS_NORETURN) {
+        if (insn->help_wout) {
+            insn->help_wout(s, &o);
+        }
+        if (insn->help_cout) {
+            insn->help_cout(s, &o);
+        }
+    }
+
+    /* Free any temporaries created by the helpers.  */
+    if (o.out && !o.g_out) {
+        tcg_temp_free_i64(o.out);
+    }
+    if (o.out2 && !o.g_out2) {
+        tcg_temp_free_i64(o.out2);
+    }
+    if (o.in1 && !o.g_in1) {
+        tcg_temp_free_i64(o.in1);
+    }
+    if (o.in2 && !o.g_in2) {
+        tcg_temp_free_i64(o.in2);
+    }
+    if (o.addr1) {
+        tcg_temp_free_i64(o.addr1);
+    }
+
+    /* io should be the last instruction in tb when icount is enabled */
+    if (unlikely(icount && ret == DISAS_NEXT)) {
+        ret = DISAS_PC_STALE;
+    }
+
+#ifndef CONFIG_USER_ONLY
+    if (s->base.tb->flags & FLAG_MASK_PER) {
+        /* An exception might be triggered, save PSW if not already done.  */
+        if (ret == DISAS_NEXT || ret == DISAS_PC_STALE) {
+            tcg_gen_movi_i64(psw_addr, s->pc_tmp);
+        }
+
+        /* Call the helper to check for a possible PER exception.  */
+        gen_helper_per_check_exception(cpu_env);
+    }
+#endif
+
+out:
+    /* Advance to the next instruction.  */
+    s->base.pc_next = s->pc_tmp;
+    return ret;
+}
+
+static void s390x_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+    /* 31-bit mode */
+    if (!(dc->base.tb->flags & FLAG_MASK_64)) {
+        dc->base.pc_first &= 0x7fffffff;
+        dc->base.pc_next = dc->base.pc_first;
+    }
+
+    dc->cc_op = CC_OP_DYNAMIC;
+    dc->ex_value = dc->base.tb->cs_base;
+    dc->do_debug = dc->base.singlestep_enabled;
+}
+
+static void s390x_tr_tb_start(DisasContextBase *db, CPUState *cs)
+{
+}
+
+static void s390x_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
+{
+}
+
+static bool s390x_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs,
+                                      const CPUBreakpoint *bp)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+    /*
+     * Emit an insn_start to accompany the breakpoint exception.
+     * The ILEN value is a dummy, since this does not result in
+     * an s390x exception, but an internal qemu exception which
+     * brings us back to interact with the gdbstub.
+     */
+    tcg_gen_insn_start(dc->base.pc_next, dc->cc_op, 2);
+
+    dc->base.is_jmp = DISAS_PC_STALE;
+    dc->do_debug = true;
+    /* The address covered by the breakpoint must be included in
+       [tb->pc, tb->pc + tb->size) in order to for it to be
+       properly cleared -- thus we increment the PC here so that
+       the logic setting tb->size does the right thing.  */
+    dc->base.pc_next += 2;
+    return true;
+}
+
+static void s390x_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
+{
+    CPUS390XState *env = cs->env_ptr;
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+    dc->base.is_jmp = translate_one(env, dc);
+    if (dc->base.is_jmp == DISAS_NEXT) {
+        uint64_t page_start;
+
+        page_start = dc->base.pc_first & TARGET_PAGE_MASK;
+        if (dc->base.pc_next - page_start >= TARGET_PAGE_SIZE || dc->ex_value) {
+            dc->base.is_jmp = DISAS_TOO_MANY;
+        }
+    }
+}
+
+static void s390x_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+    switch (dc->base.is_jmp) {
+    case DISAS_GOTO_TB:
+    case DISAS_NORETURN:
+        break;
+    case DISAS_TOO_MANY:
+    case DISAS_PC_STALE:
+    case DISAS_PC_STALE_NOCHAIN:
+        update_psw_addr(dc);
+        /* FALLTHRU */
+    case DISAS_PC_UPDATED:
+        /* Next TB starts off with CC_OP_DYNAMIC, so make sure the
+           cc op type is in env */
+        update_cc_op(dc);
+        /* FALLTHRU */
+    case DISAS_PC_CC_UPDATED:
+        /* Exit the TB, either by raising a debug exception or by return.  */
+        if (dc->do_debug) {
+            gen_exception(EXCP_DEBUG);
+        } else if ((dc->base.tb->flags & FLAG_MASK_PER) ||
+                   dc->base.is_jmp == DISAS_PC_STALE_NOCHAIN) {
+            tcg_gen_exit_tb(NULL, 0);
+        } else {
+            tcg_gen_lookup_and_goto_ptr();
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void s390x_tr_disas_log(const DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+    if (unlikely(dc->ex_value)) {
+        /* ??? Unfortunately log_target_disas can't use host memory.  */
+        qemu_log("IN: EXECUTE %016" PRIx64, dc->ex_value);
+    } else {
+        qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
+        log_target_disas(cs, dc->base.pc_first, dc->base.tb->size);
+    }
+}
+
+static const TranslatorOps s390x_tr_ops = {
+    .init_disas_context = s390x_tr_init_disas_context,
+    .tb_start           = s390x_tr_tb_start,
+    .insn_start         = s390x_tr_insn_start,
+    .breakpoint_check   = s390x_tr_breakpoint_check,
+    .translate_insn     = s390x_tr_translate_insn,
+    .tb_stop            = s390x_tr_tb_stop,
+    .disas_log          = s390x_tr_disas_log,
+};
+
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+{
+    DisasContext dc;
+
+    translator_loop(&s390x_tr_ops, &dc.base, cs, tb, max_insns);
+}
+
+void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb,
+                          target_ulong *data)
+{
+    int cc_op = data[1];
+
+    env->psw.addr = data[0];
+
+    /* Update the CC opcode if it is not already up-to-date.  */
+    if ((cc_op != CC_OP_DYNAMIC) && (cc_op != CC_OP_STATIC)) {
+        env->cc_op = cc_op;
+    }
+
+    /* Record ILEN.  */
+    env->int_pgm_ilen = data[2];
+}
diff --git a/target/s390x/tcg/translate_vx.c.inc b/target/s390x/tcg/translate_vx.c.inc
new file mode 100644
index 0000000000..0afa46e463
--- /dev/null
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -0,0 +1,3109 @@
+/*
+ * QEMU TCG support -- s390x vector instruction translation functions
+ *
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ *   David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ * For most instructions that use the same element size for reads and
+ * writes, we can use real gvec vector expansion, which potantially uses
+ * real host vector instructions. As they only work up to 64 bit elements,
+ * 128 bit elements (vector is a single element) have to be handled
+ * differently. Operations that are too complicated to encode via TCG ops
+ * are handled via gvec ool (out-of-line) handlers.
+ *
+ * As soon as instructions use different element sizes for reads and writes
+ * or access elements "out of their element scope" we expand them manually
+ * in fancy loops, as gvec expansion does not deal with actual element
+ * numbers and does also not support access to other elements.
+ *
+ * 128 bit elements:
+ *  As we only have i32/i64, such elements have to be loaded into two
+ *  i64 values and can then be processed e.g. by tcg_gen_add2_i64.
+ *
+ * Sizes:
+ *  On s390x, the operand size (oprsz) and the maximum size (maxsz) are
+ *  always 16 (128 bit). What gvec code calls "vece", s390x calls "es",
+ *  a.k.a. "element size". These values nicely map to MO_8 ... MO_64. Only
+ *  128 bit element size has to be treated in a special way (MO_64 + 1).
+ *  We will use ES_* instead of MO_* for this reason in this file.
+ *
+ * CC handling:
+ *  As gvec ool-helpers can currently not return values (besides via
+ *  pointers like vectors or cpu_env), whenever we have to set the CC and
+ *  can't conclude the value from the result vector, we will directly
+ *  set it in "env->cc_op" and mark it as static via set_cc_static()".
+ *  Whenever this is done, the helper writes globals (cc_op).
+ */
+
+#define NUM_VEC_ELEMENT_BYTES(es) (1 << (es))
+#define NUM_VEC_ELEMENTS(es) (16 / NUM_VEC_ELEMENT_BYTES(es))
+#define NUM_VEC_ELEMENT_BITS(es) (NUM_VEC_ELEMENT_BYTES(es) * BITS_PER_BYTE)
+
+#define ES_8    MO_8
+#define ES_16   MO_16
+#define ES_32   MO_32
+#define ES_64   MO_64
+#define ES_128  4
+
+/* Floating-Point Format */
+#define FPF_SHORT       2
+#define FPF_LONG        3
+#define FPF_EXT         4
+
+static inline bool valid_vec_element(uint8_t enr, MemOp es)
+{
+    return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1));
+}
+
+static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr,
+                                 MemOp memop)
+{
+    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
+
+    switch (memop) {
+    case ES_8:
+        tcg_gen_ld8u_i64(dst, cpu_env, offs);
+        break;
+    case ES_16:
+        tcg_gen_ld16u_i64(dst, cpu_env, offs);
+        break;
+    case ES_32:
+        tcg_gen_ld32u_i64(dst, cpu_env, offs);
+        break;
+    case ES_8 | MO_SIGN:
+        tcg_gen_ld8s_i64(dst, cpu_env, offs);
+        break;
+    case ES_16 | MO_SIGN:
+        tcg_gen_ld16s_i64(dst, cpu_env, offs);
+        break;
+    case ES_32 | MO_SIGN:
+        tcg_gen_ld32s_i64(dst, cpu_env, offs);
+        break;
+    case ES_64:
+    case ES_64 | MO_SIGN:
+        tcg_gen_ld_i64(dst, cpu_env, offs);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void read_vec_element_i32(TCGv_i32 dst, uint8_t reg, uint8_t enr,
+                                 MemOp memop)
+{
+    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
+
+    switch (memop) {
+    case ES_8:
+        tcg_gen_ld8u_i32(dst, cpu_env, offs);
+        break;
+    case ES_16:
+        tcg_gen_ld16u_i32(dst, cpu_env, offs);
+        break;
+    case ES_8 | MO_SIGN:
+        tcg_gen_ld8s_i32(dst, cpu_env, offs);
+        break;
+    case ES_16 | MO_SIGN:
+        tcg_gen_ld16s_i32(dst, cpu_env, offs);
+        break;
+    case ES_32:
+    case ES_32 | MO_SIGN:
+        tcg_gen_ld_i32(dst, cpu_env, offs);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr,
+                                  MemOp memop)
+{
+    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
+
+    switch (memop) {
+    case ES_8:
+        tcg_gen_st8_i64(src, cpu_env, offs);
+        break;
+    case ES_16:
+        tcg_gen_st16_i64(src, cpu_env, offs);
+        break;
+    case ES_32:
+        tcg_gen_st32_i64(src, cpu_env, offs);
+        break;
+    case ES_64:
+        tcg_gen_st_i64(src, cpu_env, offs);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void write_vec_element_i32(TCGv_i32 src, int reg, uint8_t enr,
+                                  MemOp memop)
+{
+    const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
+
+    switch (memop) {
+    case ES_8:
+        tcg_gen_st8_i32(src, cpu_env, offs);
+        break;
+    case ES_16:
+        tcg_gen_st16_i32(src, cpu_env, offs);
+        break;
+    case ES_32:
+        tcg_gen_st_i32(src, cpu_env, offs);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
+                                    uint8_t es)
+{
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    /* mask off invalid parts from the element nr */
+    tcg_gen_andi_i64(tmp, enr, NUM_VEC_ELEMENTS(es) - 1);
+
+    /* convert it to an element offset relative to cpu_env (vec_reg_offset() */
+    tcg_gen_shli_i64(tmp, tmp, es);
+#ifndef HOST_WORDS_BIGENDIAN
+    tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es));
+#endif
+    tcg_gen_addi_i64(tmp, tmp, vec_full_reg_offset(reg));
+
+    /* generate the final ptr by adding cpu_env */
+    tcg_gen_trunc_i64_ptr(ptr, tmp);
+    tcg_gen_add_ptr(ptr, ptr, cpu_env);
+
+    tcg_temp_free_i64(tmp);
+}
+
+#define gen_gvec_2(v1, v2, gen) \
+    tcg_gen_gvec_2(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                   16, 16, gen)
+#define gen_gvec_2s(v1, v2, c, gen) \
+    tcg_gen_gvec_2s(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                    16, 16, c, gen)
+#define gen_gvec_2_ool(v1, v2, data, fn) \
+    tcg_gen_gvec_2_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                       16, 16, data, fn)
+#define gen_gvec_2i_ool(v1, v2, c, data, fn) \
+    tcg_gen_gvec_2i_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                        c, 16, 16, data, fn)
+#define gen_gvec_2_ptr(v1, v2, ptr, data, fn) \
+    tcg_gen_gvec_2_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                       ptr, 16, 16, data, fn)
+#define gen_gvec_3(v1, v2, v3, gen) \
+    tcg_gen_gvec_3(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                   vec_full_reg_offset(v3), 16, 16, gen)
+#define gen_gvec_3_ool(v1, v2, v3, data, fn) \
+    tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                       vec_full_reg_offset(v3), 16, 16, data, fn)
+#define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \
+    tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                       vec_full_reg_offset(v3), ptr, 16, 16, data, fn)
+#define gen_gvec_3i(v1, v2, v3, c, gen) \
+    tcg_gen_gvec_3i(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                    vec_full_reg_offset(v3), 16, 16, c, gen)
+#define gen_gvec_4(v1, v2, v3, v4, gen) \
+    tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                   vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
+                   16, 16, gen)
+#define gen_gvec_4_ool(v1, v2, v3, v4, data, fn) \
+    tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                       vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
+                       16, 16, data, fn)
+#define gen_gvec_4_ptr(v1, v2, v3, v4, ptr, data, fn) \
+    tcg_gen_gvec_4_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                       vec_full_reg_offset(v3), vec_full_reg_offset(v4), \
+                       ptr, 16, 16, data, fn)
+#define gen_gvec_dup_i64(es, v1, c) \
+    tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
+#define gen_gvec_mov(v1, v2) \
+    tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \
+                     16)
+#define gen_gvec_dup_imm(es, v1, c) \
+    tcg_gen_gvec_dup_imm(es, vec_full_reg_offset(v1), 16, 16, c);
+#define gen_gvec_fn_2(fn, es, v1, v2) \
+    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                      16, 16)
+#define gen_gvec_fn_2i(fn, es, v1, v2, c) \
+    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                      c, 16, 16)
+#define gen_gvec_fn_2s(fn, es, v1, v2, s) \
+    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                      s, 16, 16)
+#define gen_gvec_fn_3(fn, es, v1, v2, v3) \
+    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                      vec_full_reg_offset(v3), 16, 16)
+#define gen_gvec_fn_4(fn, es, v1, v2, v3, v4) \
+    tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                      vec_full_reg_offset(v3), vec_full_reg_offset(v4), 16, 16)
+
+/*
+ * Helper to carry out a 128 bit vector computation using 2 i64 values per
+ * vector.
+ */
+typedef void (*gen_gvec128_3_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
+                                     TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
+static void gen_gvec128_3_i64(gen_gvec128_3_i64_fn fn, uint8_t d, uint8_t a,
+                              uint8_t b)
+{
+        TCGv_i64 dh = tcg_temp_new_i64();
+        TCGv_i64 dl = tcg_temp_new_i64();
+        TCGv_i64 ah = tcg_temp_new_i64();
+        TCGv_i64 al = tcg_temp_new_i64();
+        TCGv_i64 bh = tcg_temp_new_i64();
+        TCGv_i64 bl = tcg_temp_new_i64();
+
+        read_vec_element_i64(ah, a, 0, ES_64);
+        read_vec_element_i64(al, a, 1, ES_64);
+        read_vec_element_i64(bh, b, 0, ES_64);
+        read_vec_element_i64(bl, b, 1, ES_64);
+        fn(dl, dh, al, ah, bl, bh);
+        write_vec_element_i64(dh, d, 0, ES_64);
+        write_vec_element_i64(dl, d, 1, ES_64);
+
+        tcg_temp_free_i64(dh);
+        tcg_temp_free_i64(dl);
+        tcg_temp_free_i64(ah);
+        tcg_temp_free_i64(al);
+        tcg_temp_free_i64(bh);
+        tcg_temp_free_i64(bl);
+}
+
+typedef void (*gen_gvec128_4_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
+                                     TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh,
+                                     TCGv_i64 cl, TCGv_i64 ch);
+static void gen_gvec128_4_i64(gen_gvec128_4_i64_fn fn, uint8_t d, uint8_t a,
+                              uint8_t b, uint8_t c)
+{
+        TCGv_i64 dh = tcg_temp_new_i64();
+        TCGv_i64 dl = tcg_temp_new_i64();
+        TCGv_i64 ah = tcg_temp_new_i64();
+        TCGv_i64 al = tcg_temp_new_i64();
+        TCGv_i64 bh = tcg_temp_new_i64();
+        TCGv_i64 bl = tcg_temp_new_i64();
+        TCGv_i64 ch = tcg_temp_new_i64();
+        TCGv_i64 cl = tcg_temp_new_i64();
+
+        read_vec_element_i64(ah, a, 0, ES_64);
+        read_vec_element_i64(al, a, 1, ES_64);
+        read_vec_element_i64(bh, b, 0, ES_64);
+        read_vec_element_i64(bl, b, 1, ES_64);
+        read_vec_element_i64(ch, c, 0, ES_64);
+        read_vec_element_i64(cl, c, 1, ES_64);
+        fn(dl, dh, al, ah, bl, bh, cl, ch);
+        write_vec_element_i64(dh, d, 0, ES_64);
+        write_vec_element_i64(dl, d, 1, ES_64);
+
+        tcg_temp_free_i64(dh);
+        tcg_temp_free_i64(dl);
+        tcg_temp_free_i64(ah);
+        tcg_temp_free_i64(al);
+        tcg_temp_free_i64(bh);
+        tcg_temp_free_i64(bl);
+        tcg_temp_free_i64(ch);
+        tcg_temp_free_i64(cl);
+}
+
+static void gen_addi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
+                          uint64_t b)
+{
+    TCGv_i64 bl = tcg_const_i64(b);
+    TCGv_i64 bh = tcg_const_i64(0);
+
+    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
+    tcg_temp_free_i64(bl);
+    tcg_temp_free_i64(bh);
+}
+
+static DisasJumpType op_vbperm(DisasContext *s, DisasOps *o)
+{
+    gen_gvec_3_ool(get_field(s, v1), get_field(s, v2), get_field(s, v3), 0,
+                   gen_helper_gvec_vbperm);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vge(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = s->insn->data;
+    const uint8_t enr = get_field(s, m3);
+    TCGv_i64 tmp;
+
+    if (!valid_vec_element(enr, es)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    tmp = tcg_temp_new_i64();
+    read_vec_element_i64(tmp, get_field(s, v2), enr, es);
+    tcg_gen_add_i64(o->addr1, o->addr1, tmp);
+    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
+
+    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
+    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static uint64_t generate_byte_mask(uint8_t mask)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 8; i++) {
+        if ((mask >> i) & 1) {
+            r |= 0xffull << (i * 8);
+        }
+    }
+    return r;
+}
+
+static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o)
+{
+    const uint16_t i2 = get_field(s, i2);
+
+    if (i2 == (i2 & 0xff) * 0x0101) {
+        /*
+         * Masks for both 64 bit elements of the vector are the same.
+         * Trust tcg to produce a good constant loading.
+         */
+        gen_gvec_dup_imm(ES_64, get_field(s, v1),
+                         generate_byte_mask(i2 & 0xff));
+    } else {
+        TCGv_i64 t = tcg_temp_new_i64();
+
+        tcg_gen_movi_i64(t, generate_byte_mask(i2 >> 8));
+        write_vec_element_i64(t, get_field(s, v1), 0, ES_64);
+        tcg_gen_movi_i64(t, generate_byte_mask(i2));
+        write_vec_element_i64(t, get_field(s, v1), 1, ES_64);
+        tcg_temp_free_i64(t);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vgm(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    const uint8_t bits = NUM_VEC_ELEMENT_BITS(es);
+    const uint8_t i2 = get_field(s, i2) & (bits - 1);
+    const uint8_t i3 = get_field(s, i3) & (bits - 1);
+    uint64_t mask = 0;
+    int i;
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    /* generate the mask - take care of wrapping */
+    for (i = i2; ; i = (i + 1) % bits) {
+        mask |= 1ull << (bits - i - 1);
+        if (i == i3) {
+            break;
+        }
+    }
+
+    gen_gvec_dup_imm(es, get_field(s, v1), mask);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vl(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEQ);
+    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+    tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
+    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
+    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vlr(DisasContext *s, DisasOps *o)
+{
+    gen_gvec_mov(get_field(s, v1), get_field(s, v2));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m3);
+    TCGv_i64 tmp;
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    tmp = tcg_temp_new_i64();
+    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
+    gen_gvec_dup_i64(es, get_field(s, v1), tmp);
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vle(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = s->insn->data;
+    const uint8_t enr = get_field(s, m3);
+    TCGv_i64 tmp;
+
+    if (!valid_vec_element(enr, es)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    tmp = tcg_temp_new_i64();
+    tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
+    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vlei(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = s->insn->data;
+    const uint8_t enr = get_field(s, m3);
+    TCGv_i64 tmp;
+
+    if (!valid_vec_element(enr, es)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    tmp = tcg_const_i64((int16_t)get_field(s, i2));
+    write_vec_element_i64(tmp, get_field(s, v1), enr, es);
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    TCGv_ptr ptr;
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    /* fast path if we don't need the register content */
+    if (!get_field(s, b2)) {
+        uint8_t enr = get_field(s, d2) & (NUM_VEC_ELEMENTS(es) - 1);
+
+        read_vec_element_i64(o->out, get_field(s, v3), enr, es);
+        return DISAS_NEXT;
+    }
+
+    ptr = tcg_temp_new_ptr();
+    get_vec_element_ptr_i64(ptr, get_field(s, v3), o->addr1, es);
+    switch (es) {
+    case ES_8:
+        tcg_gen_ld8u_i64(o->out, ptr, 0);
+        break;
+    case ES_16:
+        tcg_gen_ld16u_i64(o->out, ptr, 0);
+        break;
+    case ES_32:
+        tcg_gen_ld32u_i64(o->out, ptr, 0);
+        break;
+    case ES_64:
+        tcg_gen_ld_i64(o->out, ptr, 0);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    tcg_temp_free_ptr(ptr);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vllez(DisasContext *s, DisasOps *o)
+{
+    uint8_t es = get_field(s, m3);
+    uint8_t enr;
+    TCGv_i64 t;
+
+    switch (es) {
+    /* rightmost sub-element of leftmost doubleword */
+    case ES_8:
+        enr = 7;
+        break;
+    case ES_16:
+        enr = 3;
+        break;
+    case ES_32:
+        enr = 1;
+        break;
+    case ES_64:
+        enr = 0;
+        break;
+    /* leftmost sub-element of leftmost doubleword */
+    case 6:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            es = ES_32;
+            enr = 0;
+            break;
+        }
+        /* fallthrough */
+    default:
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    t = tcg_temp_new_i64();
+    tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es);
+    gen_gvec_dup_imm(es, get_field(s, v1), 0);
+    write_vec_element_i64(t, get_field(s, v1), enr, es);
+    tcg_temp_free_i64(t);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vlm(DisasContext *s, DisasOps *o)
+{
+    const uint8_t v3 = get_field(s, v3);
+    uint8_t v1 = get_field(s, v1);
+    TCGv_i64 t0, t1;
+
+    if (v3 < v1 || (v3 - v1 + 1) > 16) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    /*
+     * Check for possible access exceptions by trying to load the last
+     * element. The first element will be checked first next.
+     */
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+    gen_addi_and_wrap_i64(s, t0, o->addr1, (v3 - v1) * 16 + 8);
+    tcg_gen_qemu_ld_i64(t0, t0, get_mem_index(s), MO_TEQ);
+
+    for (;; v1++) {
+        tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
+        write_vec_element_i64(t1, v1, 0, ES_64);
+        if (v1 == v3) {
+            break;
+        }
+        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+        tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ);
+        write_vec_element_i64(t1, v1, 1, ES_64);
+        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+    }
+
+    /* Store the last element, loaded first */
+    write_vec_element_i64(t0, v1, 1, ES_64);
+
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vlbb(DisasContext *s, DisasOps *o)
+{
+    const int64_t block_size = (1ull << (get_field(s, m3) + 6));
+    const int v1_offs = vec_full_reg_offset(get_field(s, v1));
+    TCGv_ptr a0;
+    TCGv_i64 bytes;
+
+    if (get_field(s, m3) > 6) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    bytes = tcg_temp_new_i64();
+    a0 = tcg_temp_new_ptr();
+    /* calculate the number of bytes until the next block boundary */
+    tcg_gen_ori_i64(bytes, o->addr1, -block_size);
+    tcg_gen_neg_i64(bytes, bytes);
+
+    tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
+    gen_helper_vll(cpu_env, a0, o->addr1, bytes);
+    tcg_temp_free_i64(bytes);
+    tcg_temp_free_ptr(a0);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vlvg(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    TCGv_ptr ptr;
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    /* fast path if we don't need the register content */
+    if (!get_field(s, b2)) {
+        uint8_t enr = get_field(s, d2) & (NUM_VEC_ELEMENTS(es) - 1);
+
+        write_vec_element_i64(o->in2, get_field(s, v1), enr, es);
+        return DISAS_NEXT;
+    }
+
+    ptr = tcg_temp_new_ptr();
+    get_vec_element_ptr_i64(ptr, get_field(s, v1), o->addr1, es);
+    switch (es) {
+    case ES_8:
+        tcg_gen_st8_i64(o->in2, ptr, 0);
+        break;
+    case ES_16:
+        tcg_gen_st16_i64(o->in2, ptr, 0);
+        break;
+    case ES_32:
+        tcg_gen_st32_i64(o->in2, ptr, 0);
+        break;
+    case ES_64:
+        tcg_gen_st_i64(o->in2, ptr, 0);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    tcg_temp_free_ptr(ptr);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vlvgp(DisasContext *s, DisasOps *o)
+{
+    write_vec_element_i64(o->in1, get_field(s, v1), 0, ES_64);
+    write_vec_element_i64(o->in2, get_field(s, v1), 1, ES_64);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vll(DisasContext *s, DisasOps *o)
+{
+    const int v1_offs = vec_full_reg_offset(get_field(s, v1));
+    TCGv_ptr a0 = tcg_temp_new_ptr();
+
+    /* convert highest index into an actual length */
+    tcg_gen_addi_i64(o->in2, o->in2, 1);
+    tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
+    gen_helper_vll(cpu_env, a0, o->addr1, o->in2);
+    tcg_temp_free_ptr(a0);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vmr(DisasContext *s, DisasOps *o)
+{
+    const uint8_t v1 = get_field(s, v1);
+    const uint8_t v2 = get_field(s, v2);
+    const uint8_t v3 = get_field(s, v3);
+    const uint8_t es = get_field(s, m4);
+    int dst_idx, src_idx;
+    TCGv_i64 tmp;
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    tmp = tcg_temp_new_i64();
+    if (s->fields.op2 == 0x61) {
+        /* iterate backwards to avoid overwriting data we might need later */
+        for (dst_idx = NUM_VEC_ELEMENTS(es) - 1; dst_idx >= 0; dst_idx--) {
+            src_idx = dst_idx / 2;
+            if (dst_idx % 2 == 0) {
+                read_vec_element_i64(tmp, v2, src_idx, es);
+            } else {
+                read_vec_element_i64(tmp, v3, src_idx, es);
+            }
+            write_vec_element_i64(tmp, v1, dst_idx, es);
+        }
+    } else {
+        /* iterate forward to avoid overwriting data we might need later */
+        for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(es); dst_idx++) {
+            src_idx = (dst_idx + NUM_VEC_ELEMENTS(es)) / 2;
+            if (dst_idx % 2 == 0) {
+                read_vec_element_i64(tmp, v2, src_idx, es);
+            } else {
+                read_vec_element_i64(tmp, v3, src_idx, es);
+            }
+            write_vec_element_i64(tmp, v1, dst_idx, es);
+        }
+    }
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vpk(DisasContext *s, DisasOps *o)
+{
+    const uint8_t v1 = get_field(s, v1);
+    const uint8_t v2 = get_field(s, v2);
+    const uint8_t v3 = get_field(s, v3);
+    const uint8_t es = get_field(s, m4);
+    static gen_helper_gvec_3 * const vpk[3] = {
+        gen_helper_gvec_vpk16,
+        gen_helper_gvec_vpk32,
+        gen_helper_gvec_vpk64,
+    };
+     static gen_helper_gvec_3 * const vpks[3] = {
+        gen_helper_gvec_vpks16,
+        gen_helper_gvec_vpks32,
+        gen_helper_gvec_vpks64,
+    };
+    static gen_helper_gvec_3_ptr * const vpks_cc[3] = {
+        gen_helper_gvec_vpks_cc16,
+        gen_helper_gvec_vpks_cc32,
+        gen_helper_gvec_vpks_cc64,
+    };
+    static gen_helper_gvec_3 * const vpkls[3] = {
+        gen_helper_gvec_vpkls16,
+        gen_helper_gvec_vpkls32,
+        gen_helper_gvec_vpkls64,
+    };
+    static gen_helper_gvec_3_ptr * const vpkls_cc[3] = {
+        gen_helper_gvec_vpkls_cc16,
+        gen_helper_gvec_vpkls_cc32,
+        gen_helper_gvec_vpkls_cc64,
+    };
+
+    if (es == ES_8 || es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    switch (s->fields.op2) {
+    case 0x97:
+        if (get_field(s, m5) & 0x1) {
+            gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpks_cc[es - 1]);
+            set_cc_static(s);
+        } else {
+            gen_gvec_3_ool(v1, v2, v3, 0, vpks[es - 1]);
+        }
+        break;
+    case 0x95:
+        if (get_field(s, m5) & 0x1) {
+            gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpkls_cc[es - 1]);
+            set_cc_static(s);
+        } else {
+            gen_gvec_3_ool(v1, v2, v3, 0, vpkls[es - 1]);
+        }
+        break;
+    case 0x94:
+        /* If sources and destination dont't overlap -> fast path */
+        if (v1 != v2 && v1 != v3) {
+            const uint8_t src_es = get_field(s, m4);
+            const uint8_t dst_es = src_es - 1;
+            TCGv_i64 tmp = tcg_temp_new_i64();
+            int dst_idx, src_idx;
+
+            for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
+                src_idx = dst_idx;
+                if (src_idx < NUM_VEC_ELEMENTS(src_es)) {
+                    read_vec_element_i64(tmp, v2, src_idx, src_es);
+                } else {
+                    src_idx -= NUM_VEC_ELEMENTS(src_es);
+                    read_vec_element_i64(tmp, v3, src_idx, src_es);
+                }
+                write_vec_element_i64(tmp, v1, dst_idx, dst_es);
+            }
+            tcg_temp_free_i64(tmp);
+        } else {
+            gen_gvec_3_ool(v1, v2, v3, 0, vpk[es - 1]);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vperm(DisasContext *s, DisasOps *o)
+{
+    gen_gvec_4_ool(get_field(s, v1), get_field(s, v2),
+                   get_field(s, v3), get_field(s, v4),
+                   0, gen_helper_gvec_vperm);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vpdi(DisasContext *s, DisasOps *o)
+{
+    const uint8_t i2 = extract32(get_field(s, m4), 2, 1);
+    const uint8_t i3 = extract32(get_field(s, m4), 0, 1);
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    read_vec_element_i64(t0, get_field(s, v2), i2, ES_64);
+    read_vec_element_i64(t1, get_field(s, v3), i3, ES_64);
+    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
+    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vrep(DisasContext *s, DisasOps *o)
+{
+    const uint8_t enr = get_field(s, i2);
+    const uint8_t es = get_field(s, m4);
+
+    if (es > ES_64 || !valid_vec_element(enr, es)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    tcg_gen_gvec_dup_mem(es, vec_full_reg_offset(get_field(s, v1)),
+                         vec_reg_offset(get_field(s, v3), enr, es),
+                         16, 16);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o)
+{
+    const int64_t data = (int16_t)get_field(s, i2);
+    const uint8_t es = get_field(s, m3);
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_dup_imm(es, get_field(s, v1), data);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vsce(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = s->insn->data;
+    const uint8_t enr = get_field(s, m3);
+    TCGv_i64 tmp;
+
+    if (!valid_vec_element(enr, es)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    tmp = tcg_temp_new_i64();
+    read_vec_element_i64(tmp, get_field(s, v2), enr, es);
+    tcg_gen_add_i64(o->addr1, o->addr1, tmp);
+    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0);
+
+    read_vec_element_i64(tmp, get_field(s, v1), enr, es);
+    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vsel(DisasContext *s, DisasOps *o)
+{
+    gen_gvec_fn_4(bitsel, ES_8, get_field(s, v1),
+                  get_field(s, v4), get_field(s, v2),
+                  get_field(s, v3));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vseg(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m3);
+    int idx1, idx2;
+    TCGv_i64 tmp;
+
+    switch (es) {
+    case ES_8:
+        idx1 = 7;
+        idx2 = 15;
+        break;
+    case ES_16:
+        idx1 = 3;
+        idx2 = 7;
+        break;
+    case ES_32:
+        idx1 = 1;
+        idx2 = 3;
+        break;
+    default:
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    tmp = tcg_temp_new_i64();
+    read_vec_element_i64(tmp, get_field(s, v2), idx1, es | MO_SIGN);
+    write_vec_element_i64(tmp, get_field(s, v1), 0, ES_64);
+    read_vec_element_i64(tmp, get_field(s, v2), idx2, es | MO_SIGN);
+    write_vec_element_i64(tmp, get_field(s, v1), 1, ES_64);
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vst(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 tmp = tcg_const_i64(16);
+
+    /* Probe write access before actually modifying memory */
+    gen_helper_probe_write_access(cpu_env, o->addr1, tmp);
+
+    read_vec_element_i64(tmp,  get_field(s, v1), 0, ES_64);
+    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
+    gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+    read_vec_element_i64(tmp,  get_field(s, v1), 1, ES_64);
+    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vste(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = s->insn->data;
+    const uint8_t enr = get_field(s, m3);
+    TCGv_i64 tmp;
+
+    if (!valid_vec_element(enr, es)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    tmp = tcg_temp_new_i64();
+    read_vec_element_i64(tmp, get_field(s, v1), enr, es);
+    tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es);
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vstm(DisasContext *s, DisasOps *o)
+{
+    const uint8_t v3 = get_field(s, v3);
+    uint8_t v1 = get_field(s, v1);
+    TCGv_i64 tmp;
+
+    while (v3 < v1 || (v3 - v1 + 1) > 16) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    /* Probe write access before actually modifying memory */
+    tmp = tcg_const_i64((v3 - v1 + 1) * 16);
+    gen_helper_probe_write_access(cpu_env, o->addr1, tmp);
+
+    for (;; v1++) {
+        read_vec_element_i64(tmp, v1, 0, ES_64);
+        tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
+        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+        read_vec_element_i64(tmp, v1, 1, ES_64);
+        tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ);
+        if (v1 == v3) {
+            break;
+        }
+        gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
+    }
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vstl(DisasContext *s, DisasOps *o)
+{
+    const int v1_offs = vec_full_reg_offset(get_field(s, v1));
+    TCGv_ptr a0 = tcg_temp_new_ptr();
+
+    /* convert highest index into an actual length */
+    tcg_gen_addi_i64(o->in2, o->in2, 1);
+    tcg_gen_addi_ptr(a0, cpu_env, v1_offs);
+    gen_helper_vstl(cpu_env, a0, o->addr1, o->in2);
+    tcg_temp_free_ptr(a0);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vup(DisasContext *s, DisasOps *o)
+{
+    const bool logical = s->fields.op2 == 0xd4 || s->fields.op2 == 0xd5;
+    const uint8_t v1 = get_field(s, v1);
+    const uint8_t v2 = get_field(s, v2);
+    const uint8_t src_es = get_field(s, m3);
+    const uint8_t dst_es = src_es + 1;
+    int dst_idx, src_idx;
+    TCGv_i64 tmp;
+
+    if (src_es > ES_32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    tmp = tcg_temp_new_i64();
+    if (s->fields.op2 == 0xd7 || s->fields.op2 == 0xd5) {
+        /* iterate backwards to avoid overwriting data we might need later */
+        for (dst_idx = NUM_VEC_ELEMENTS(dst_es) - 1; dst_idx >= 0; dst_idx--) {
+            src_idx = dst_idx;
+            read_vec_element_i64(tmp, v2, src_idx,
+                                 src_es | (logical ? 0 : MO_SIGN));
+            write_vec_element_i64(tmp, v1, dst_idx, dst_es);
+        }
+
+    } else {
+        /* iterate forward to avoid overwriting data we might need later */
+        for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
+            src_idx = dst_idx + NUM_VEC_ELEMENTS(src_es) / 2;
+            read_vec_element_i64(tmp, v2, src_idx,
+                                 src_es | (logical ? 0 : MO_SIGN));
+            write_vec_element_i64(tmp, v1, dst_idx, dst_es);
+        }
+    }
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_va(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+
+    if (es > ES_128) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    } else if (es == ES_128) {
+        gen_gvec128_3_i64(tcg_gen_add2_i64, get_field(s, v1),
+                          get_field(s, v2), get_field(s, v3));
+        return DISAS_NEXT;
+    }
+    gen_gvec_fn_3(add, es, get_field(s, v1), get_field(s, v2),
+                  get_field(s, v3));
+    return DISAS_NEXT;
+}
+
+static void gen_acc(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, uint8_t es)
+{
+    const uint8_t msb_bit_nr = NUM_VEC_ELEMENT_BITS(es) - 1;
+    TCGv_i64 msb_mask = tcg_const_i64(dup_const(es, 1ull << msb_bit_nr));
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    TCGv_i64 t3 = tcg_temp_new_i64();
+
+    /* Calculate the carry into the MSB, ignoring the old MSBs */
+    tcg_gen_andc_i64(t1, a, msb_mask);
+    tcg_gen_andc_i64(t2, b, msb_mask);
+    tcg_gen_add_i64(t1, t1, t2);
+    /* Calculate the MSB without any carry into it */
+    tcg_gen_xor_i64(t3, a, b);
+    /* Calculate the carry out of the MSB in the MSB bit position */
+    tcg_gen_and_i64(d, a, b);
+    tcg_gen_and_i64(t1, t1, t3);
+    tcg_gen_or_i64(d, d, t1);
+    /* Isolate and shift the carry into position */
+    tcg_gen_and_i64(d, d, msb_mask);
+    tcg_gen_shri_i64(d, d, msb_bit_nr);
+
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    tcg_temp_free_i64(t3);
+}
+
+static void gen_acc8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    gen_acc(d, a, b, ES_8);
+}
+
+static void gen_acc16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    gen_acc(d, a, b, ES_16);
+}
+
+static void gen_acc_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t = tcg_temp_new_i32();
+
+    tcg_gen_add_i32(t, a, b);
+    tcg_gen_setcond_i32(TCG_COND_LTU, d, t, b);
+    tcg_temp_free_i32(t);
+}
+
+static void gen_acc_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    tcg_gen_add_i64(t, a, b);
+    tcg_gen_setcond_i64(TCG_COND_LTU, d, t, b);
+    tcg_temp_free_i64(t);
+}
+
+static void gen_acc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
+                         TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
+{
+    TCGv_i64 th = tcg_temp_new_i64();
+    TCGv_i64 tl = tcg_temp_new_i64();
+    TCGv_i64 zero = tcg_const_i64(0);
+
+    tcg_gen_add2_i64(tl, th, al, zero, bl, zero);
+    tcg_gen_add2_i64(tl, th, th, zero, ah, zero);
+    tcg_gen_add2_i64(tl, dl, tl, th, bh, zero);
+    tcg_gen_mov_i64(dh, zero);
+
+    tcg_temp_free_i64(th);
+    tcg_temp_free_i64(tl);
+    tcg_temp_free_i64(zero);
+}
+
+static DisasJumpType op_vacc(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    static const GVecGen3 g[4] = {
+        { .fni8 = gen_acc8_i64, },
+        { .fni8 = gen_acc16_i64, },
+        { .fni4 = gen_acc_i32, },
+        { .fni8 = gen_acc_i64, },
+    };
+
+    if (es > ES_128) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    } else if (es == ES_128) {
+        gen_gvec128_3_i64(gen_acc2_i64, get_field(s, v1),
+                          get_field(s, v2), get_field(s, v3));
+        return DISAS_NEXT;
+    }
+    gen_gvec_3(get_field(s, v1), get_field(s, v2),
+               get_field(s, v3), &g[es]);
+    return DISAS_NEXT;
+}
+
+static void gen_ac2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
+                        TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
+{
+    TCGv_i64 tl = tcg_temp_new_i64();
+    TCGv_i64 th = tcg_const_i64(0);
+
+    /* extract the carry only */
+    tcg_gen_extract_i64(tl, cl, 0, 1);
+    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
+    tcg_gen_add2_i64(dl, dh, dl, dh, tl, th);
+
+    tcg_temp_free_i64(tl);
+    tcg_temp_free_i64(th);
+}
+
+static DisasJumpType op_vac(DisasContext *s, DisasOps *o)
+{
+    if (get_field(s, m5) != ES_128) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec128_4_i64(gen_ac2_i64, get_field(s, v1),
+                      get_field(s, v2), get_field(s, v3),
+                      get_field(s, v4));
+    return DISAS_NEXT;
+}
+
+static void gen_accc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
+                          TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
+{
+    TCGv_i64 tl = tcg_temp_new_i64();
+    TCGv_i64 th = tcg_temp_new_i64();
+    TCGv_i64 zero = tcg_const_i64(0);
+
+    tcg_gen_andi_i64(tl, cl, 1);
+    tcg_gen_add2_i64(tl, th, tl, zero, al, zero);
+    tcg_gen_add2_i64(tl, th, tl, th, bl, zero);
+    tcg_gen_add2_i64(tl, th, th, zero, ah, zero);
+    tcg_gen_add2_i64(tl, dl, tl, th, bh, zero);
+    tcg_gen_mov_i64(dh, zero);
+
+    tcg_temp_free_i64(tl);
+    tcg_temp_free_i64(th);
+    tcg_temp_free_i64(zero);
+}
+
+static DisasJumpType op_vaccc(DisasContext *s, DisasOps *o)
+{
+    if (get_field(s, m5) != ES_128) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec128_4_i64(gen_accc2_i64, get_field(s, v1),
+                      get_field(s, v2), get_field(s, v3),
+                      get_field(s, v4));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vn(DisasContext *s, DisasOps *o)
+{
+    gen_gvec_fn_3(and, ES_8, get_field(s, v1), get_field(s, v2),
+                  get_field(s, v3));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vnc(DisasContext *s, DisasOps *o)
+{
+    gen_gvec_fn_3(andc, ES_8, get_field(s, v1),
+                  get_field(s, v2), get_field(s, v3));
+    return DISAS_NEXT;
+}
+
+static void gen_avg_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_ext_i32_i64(t0, a);
+    tcg_gen_ext_i32_i64(t1, b);
+    tcg_gen_add_i64(t0, t0, t1);
+    tcg_gen_addi_i64(t0, t0, 1);
+    tcg_gen_shri_i64(t0, t0, 1);
+    tcg_gen_extrl_i64_i32(d, t0);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_avg_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl)
+{
+    TCGv_i64 dh = tcg_temp_new_i64();
+    TCGv_i64 ah = tcg_temp_new_i64();
+    TCGv_i64 bh = tcg_temp_new_i64();
+
+    /* extending the sign by one bit is sufficient */
+    tcg_gen_extract_i64(ah, al, 63, 1);
+    tcg_gen_extract_i64(bh, bl, 63, 1);
+    tcg_gen_add2_i64(dl, dh, al, ah, bl, bh);
+    gen_addi2_i64(dl, dh, dl, dh, 1);
+    tcg_gen_extract2_i64(dl, dl, dh, 1);
+
+    tcg_temp_free_i64(dh);
+    tcg_temp_free_i64(ah);
+    tcg_temp_free_i64(bh);
+}
+
+static DisasJumpType op_vavg(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    static const GVecGen3 g[4] = {
+        { .fno = gen_helper_gvec_vavg8, },
+        { .fno = gen_helper_gvec_vavg16, },
+        { .fni4 = gen_avg_i32, },
+        { .fni8 = gen_avg_i64, },
+    };
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    gen_gvec_3(get_field(s, v1), get_field(s, v2),
+               get_field(s, v3), &g[es]);
+    return DISAS_NEXT;
+}
+
+static void gen_avgl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_extu_i32_i64(t0, a);
+    tcg_gen_extu_i32_i64(t1, b);
+    tcg_gen_add_i64(t0, t0, t1);
+    tcg_gen_addi_i64(t0, t0, 1);
+    tcg_gen_shri_i64(t0, t0, 1);
+    tcg_gen_extrl_i64_i32(d, t0);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_avgl_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl)
+{
+    TCGv_i64 dh = tcg_temp_new_i64();
+    TCGv_i64 zero = tcg_const_i64(0);
+
+    tcg_gen_add2_i64(dl, dh, al, zero, bl, zero);
+    gen_addi2_i64(dl, dh, dl, dh, 1);
+    tcg_gen_extract2_i64(dl, dl, dh, 1);
+
+    tcg_temp_free_i64(dh);
+    tcg_temp_free_i64(zero);
+}
+
+static DisasJumpType op_vavgl(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    static const GVecGen3 g[4] = {
+        { .fno = gen_helper_gvec_vavgl8, },
+        { .fno = gen_helper_gvec_vavgl16, },
+        { .fni4 = gen_avgl_i32, },
+        { .fni8 = gen_avgl_i64, },
+    };
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    gen_gvec_3(get_field(s, v1), get_field(s, v2),
+               get_field(s, v3), &g[es]);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vcksm(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 tmp = tcg_temp_new_i32();
+    TCGv_i32 sum = tcg_temp_new_i32();
+    int i;
+
+    read_vec_element_i32(sum, get_field(s, v3), 1, ES_32);
+    for (i = 0; i < 4; i++) {
+        read_vec_element_i32(tmp, get_field(s, v2), i, ES_32);
+        tcg_gen_add2_i32(tmp, sum, sum, sum, tmp, tmp);
+    }
+    gen_gvec_dup_imm(ES_32, get_field(s, v1), 0);
+    write_vec_element_i32(sum, get_field(s, v1), 1, ES_32);
+
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(sum);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vec(DisasContext *s, DisasOps *o)
+{
+    uint8_t es = get_field(s, m3);
+    const uint8_t enr = NUM_VEC_ELEMENTS(es) / 2 - 1;
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    if (s->fields.op2 == 0xdb) {
+        es |= MO_SIGN;
+    }
+
+    o->in1 = tcg_temp_new_i64();
+    o->in2 = tcg_temp_new_i64();
+    read_vec_element_i64(o->in1, get_field(s, v1), enr, es);
+    read_vec_element_i64(o->in2, get_field(s, v2), enr, es);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vc(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    TCGCond cond = s->insn->data;
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    tcg_gen_gvec_cmp(cond, es,
+                     vec_full_reg_offset(get_field(s, v1)),
+                     vec_full_reg_offset(get_field(s, v2)),
+                     vec_full_reg_offset(get_field(s, v3)), 16, 16);
+    if (get_field(s, m5) & 0x1) {
+        TCGv_i64 low = tcg_temp_new_i64();
+        TCGv_i64 high = tcg_temp_new_i64();
+
+        read_vec_element_i64(high, get_field(s, v1), 0, ES_64);
+        read_vec_element_i64(low, get_field(s, v1), 1, ES_64);
+        gen_op_update2_cc_i64(s, CC_OP_VC, low, high);
+
+        tcg_temp_free_i64(low);
+        tcg_temp_free_i64(high);
+    }
+    return DISAS_NEXT;
+}
+
+static void gen_clz_i32(TCGv_i32 d, TCGv_i32 a)
+{
+    tcg_gen_clzi_i32(d, a, 32);
+}
+
+static void gen_clz_i64(TCGv_i64 d, TCGv_i64 a)
+{
+    tcg_gen_clzi_i64(d, a, 64);
+}
+
+static DisasJumpType op_vclz(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m3);
+    static const GVecGen2 g[4] = {
+        { .fno = gen_helper_gvec_vclz8, },
+        { .fno = gen_helper_gvec_vclz16, },
+        { .fni4 = gen_clz_i32, },
+        { .fni8 = gen_clz_i64, },
+    };
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]);
+    return DISAS_NEXT;
+}
+
+static void gen_ctz_i32(TCGv_i32 d, TCGv_i32 a)
+{
+    tcg_gen_ctzi_i32(d, a, 32);
+}
+
+static void gen_ctz_i64(TCGv_i64 d, TCGv_i64 a)
+{
+    tcg_gen_ctzi_i64(d, a, 64);
+}
+
+static DisasJumpType op_vctz(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m3);
+    static const GVecGen2 g[4] = {
+        { .fno = gen_helper_gvec_vctz8, },
+        { .fno = gen_helper_gvec_vctz16, },
+        { .fni4 = gen_ctz_i32, },
+        { .fni8 = gen_ctz_i64, },
+    };
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vx(DisasContext *s, DisasOps *o)
+{
+    gen_gvec_fn_3(xor, ES_8, get_field(s, v1), get_field(s, v2),
+                 get_field(s, v3));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vgfm(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    static const GVecGen3 g[4] = {
+        { .fno = gen_helper_gvec_vgfm8, },
+        { .fno = gen_helper_gvec_vgfm16, },
+        { .fno = gen_helper_gvec_vgfm32, },
+        { .fno = gen_helper_gvec_vgfm64, },
+    };
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    gen_gvec_3(get_field(s, v1), get_field(s, v2),
+               get_field(s, v3), &g[es]);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vgfma(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m5);
+    static const GVecGen4 g[4] = {
+        { .fno = gen_helper_gvec_vgfma8, },
+        { .fno = gen_helper_gvec_vgfma16, },
+        { .fno = gen_helper_gvec_vgfma32, },
+        { .fno = gen_helper_gvec_vgfma64, },
+    };
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    gen_gvec_4(get_field(s, v1), get_field(s, v2),
+               get_field(s, v3), get_field(s, v4), &g[es]);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vlc(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m3);
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_fn_2(neg, es, get_field(s, v1), get_field(s, v2));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vlp(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m3);
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_fn_2(abs, es, get_field(s, v1), get_field(s, v2));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vmx(DisasContext *s, DisasOps *o)
+{
+    const uint8_t v1 = get_field(s, v1);
+    const uint8_t v2 = get_field(s, v2);
+    const uint8_t v3 = get_field(s, v3);
+    const uint8_t es = get_field(s, m4);
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    switch (s->fields.op2) {
+    case 0xff:
+        gen_gvec_fn_3(smax, es, v1, v2, v3);
+        break;
+    case 0xfd:
+        gen_gvec_fn_3(umax, es, v1, v2, v3);
+        break;
+    case 0xfe:
+        gen_gvec_fn_3(smin, es, v1, v2, v3);
+        break;
+    case 0xfc:
+        gen_gvec_fn_3(umin, es, v1, v2, v3);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return DISAS_NEXT;
+}
+
+static void gen_mal_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+
+    tcg_gen_mul_i32(t0, a, b);
+    tcg_gen_add_i32(d, t0, c);
+
+    tcg_temp_free_i32(t0);
+}
+
+static void gen_mah_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    tcg_gen_ext_i32_i64(t0, a);
+    tcg_gen_ext_i32_i64(t1, b);
+    tcg_gen_ext_i32_i64(t2, c);
+    tcg_gen_mul_i64(t0, t0, t1);
+    tcg_gen_add_i64(t0, t0, t2);
+    tcg_gen_extrh_i64_i32(d, t0);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
+
+static void gen_malh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    tcg_gen_extu_i32_i64(t0, a);
+    tcg_gen_extu_i32_i64(t1, b);
+    tcg_gen_extu_i32_i64(t2, c);
+    tcg_gen_mul_i64(t0, t0, t1);
+    tcg_gen_add_i64(t0, t0, t2);
+    tcg_gen_extrh_i64_i32(d, t0);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
+
+static DisasJumpType op_vma(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m5);
+    static const GVecGen4 g_vmal[3] = {
+        { .fno = gen_helper_gvec_vmal8, },
+        { .fno = gen_helper_gvec_vmal16, },
+        { .fni4 = gen_mal_i32, },
+    };
+    static const GVecGen4 g_vmah[3] = {
+        { .fno = gen_helper_gvec_vmah8, },
+        { .fno = gen_helper_gvec_vmah16, },
+        { .fni4 = gen_mah_i32, },
+    };
+    static const GVecGen4 g_vmalh[3] = {
+        { .fno = gen_helper_gvec_vmalh8, },
+        { .fno = gen_helper_gvec_vmalh16, },
+        { .fni4 = gen_malh_i32, },
+    };
+    static const GVecGen4 g_vmae[3] = {
+        { .fno = gen_helper_gvec_vmae8, },
+        { .fno = gen_helper_gvec_vmae16, },
+        { .fno = gen_helper_gvec_vmae32, },
+    };
+    static const GVecGen4 g_vmale[3] = {
+        { .fno = gen_helper_gvec_vmale8, },
+        { .fno = gen_helper_gvec_vmale16, },
+        { .fno = gen_helper_gvec_vmale32, },
+    };
+    static const GVecGen4 g_vmao[3] = {
+        { .fno = gen_helper_gvec_vmao8, },
+        { .fno = gen_helper_gvec_vmao16, },
+        { .fno = gen_helper_gvec_vmao32, },
+    };
+    static const GVecGen4 g_vmalo[3] = {
+        { .fno = gen_helper_gvec_vmalo8, },
+        { .fno = gen_helper_gvec_vmalo16, },
+        { .fno = gen_helper_gvec_vmalo32, },
+    };
+    const GVecGen4 *fn;
+
+    if (es > ES_32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    switch (s->fields.op2) {
+    case 0xaa:
+        fn = &g_vmal[es];
+        break;
+    case 0xab:
+        fn = &g_vmah[es];
+        break;
+    case 0xa9:
+        fn = &g_vmalh[es];
+        break;
+    case 0xae:
+        fn = &g_vmae[es];
+        break;
+    case 0xac:
+        fn = &g_vmale[es];
+        break;
+    case 0xaf:
+        fn = &g_vmao[es];
+        break;
+    case 0xad:
+        fn = &g_vmalo[es];
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    gen_gvec_4(get_field(s, v1), get_field(s, v2),
+               get_field(s, v3), get_field(s, v4), fn);
+    return DISAS_NEXT;
+}
+
+static void gen_mh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t = tcg_temp_new_i32();
+
+    tcg_gen_muls2_i32(t, d, a, b);
+    tcg_temp_free_i32(t);
+}
+
+static void gen_mlh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t = tcg_temp_new_i32();
+
+    tcg_gen_mulu2_i32(t, d, a, b);
+    tcg_temp_free_i32(t);
+}
+
+static DisasJumpType op_vm(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    static const GVecGen3 g_vmh[3] = {
+        { .fno = gen_helper_gvec_vmh8, },
+        { .fno = gen_helper_gvec_vmh16, },
+        { .fni4 = gen_mh_i32, },
+    };
+    static const GVecGen3 g_vmlh[3] = {
+        { .fno = gen_helper_gvec_vmlh8, },
+        { .fno = gen_helper_gvec_vmlh16, },
+        { .fni4 = gen_mlh_i32, },
+    };
+    static const GVecGen3 g_vme[3] = {
+        { .fno = gen_helper_gvec_vme8, },
+        { .fno = gen_helper_gvec_vme16, },
+        { .fno = gen_helper_gvec_vme32, },
+    };
+    static const GVecGen3 g_vmle[3] = {
+        { .fno = gen_helper_gvec_vmle8, },
+        { .fno = gen_helper_gvec_vmle16, },
+        { .fno = gen_helper_gvec_vmle32, },
+    };
+    static const GVecGen3 g_vmo[3] = {
+        { .fno = gen_helper_gvec_vmo8, },
+        { .fno = gen_helper_gvec_vmo16, },
+        { .fno = gen_helper_gvec_vmo32, },
+    };
+    static const GVecGen3 g_vmlo[3] = {
+        { .fno = gen_helper_gvec_vmlo8, },
+        { .fno = gen_helper_gvec_vmlo16, },
+        { .fno = gen_helper_gvec_vmlo32, },
+    };
+    const GVecGen3 *fn;
+
+    if (es > ES_32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    switch (s->fields.op2) {
+    case 0xa2:
+        gen_gvec_fn_3(mul, es, get_field(s, v1),
+                      get_field(s, v2), get_field(s, v3));
+        return DISAS_NEXT;
+    case 0xa3:
+        fn = &g_vmh[es];
+        break;
+    case 0xa1:
+        fn = &g_vmlh[es];
+        break;
+    case 0xa6:
+        fn = &g_vme[es];
+        break;
+    case 0xa4:
+        fn = &g_vmle[es];
+        break;
+    case 0xa7:
+        fn = &g_vmo[es];
+        break;
+    case 0xa5:
+        fn = &g_vmlo[es];
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    gen_gvec_3(get_field(s, v1), get_field(s, v2),
+               get_field(s, v3), fn);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vmsl(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 l1, h1, l2, h2;
+
+    if (get_field(s, m5) != ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    l1 = tcg_temp_new_i64();
+    h1 = tcg_temp_new_i64();
+    l2 = tcg_temp_new_i64();
+    h2 = tcg_temp_new_i64();
+
+    /* Multipy both even elements from v2 and v3 */
+    read_vec_element_i64(l1, get_field(s, v2), 0, ES_64);
+    read_vec_element_i64(h1, get_field(s, v3), 0, ES_64);
+    tcg_gen_mulu2_i64(l1, h1, l1, h1);
+    /* Shift result left by one (x2) if requested */
+    if (extract32(get_field(s, m6), 3, 1)) {
+        tcg_gen_add2_i64(l1, h1, l1, h1, l1, h1);
+    }
+
+    /* Multipy both odd elements from v2 and v3 */
+    read_vec_element_i64(l2, get_field(s, v2), 1, ES_64);
+    read_vec_element_i64(h2, get_field(s, v3), 1, ES_64);
+    tcg_gen_mulu2_i64(l2, h2, l2, h2);
+    /* Shift result left by one (x2) if requested */
+    if (extract32(get_field(s, m6), 2, 1)) {
+        tcg_gen_add2_i64(l2, h2, l2, h2, l2, h2);
+    }
+
+    /* Add both intermediate results */
+    tcg_gen_add2_i64(l1, h1, l1, h1, l2, h2);
+    /* Add whole v4 */
+    read_vec_element_i64(h2, get_field(s, v4), 0, ES_64);
+    read_vec_element_i64(l2, get_field(s, v4), 1, ES_64);
+    tcg_gen_add2_i64(l1, h1, l1, h1, l2, h2);
+
+    /* Store final result into v1. */
+    write_vec_element_i64(h1, get_field(s, v1), 0, ES_64);
+    write_vec_element_i64(l1, get_field(s, v1), 1, ES_64);
+
+    tcg_temp_free_i64(l1);
+    tcg_temp_free_i64(h1);
+    tcg_temp_free_i64(l2);
+    tcg_temp_free_i64(h2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vnn(DisasContext *s, DisasOps *o)
+{
+    gen_gvec_fn_3(nand, ES_8, get_field(s, v1),
+                  get_field(s, v2), get_field(s, v3));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vno(DisasContext *s, DisasOps *o)
+{
+    gen_gvec_fn_3(nor, ES_8, get_field(s, v1), get_field(s, v2),
+                  get_field(s, v3));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vnx(DisasContext *s, DisasOps *o)
+{
+    gen_gvec_fn_3(eqv, ES_8, get_field(s, v1), get_field(s, v2),
+                  get_field(s, v3));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vo(DisasContext *s, DisasOps *o)
+{
+    gen_gvec_fn_3(or, ES_8, get_field(s, v1), get_field(s, v2),
+                  get_field(s, v3));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_voc(DisasContext *s, DisasOps *o)
+{
+    gen_gvec_fn_3(orc, ES_8, get_field(s, v1), get_field(s, v2),
+                  get_field(s, v3));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vpopct(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m3);
+    static const GVecGen2 g[4] = {
+        { .fno = gen_helper_gvec_vpopct8, },
+        { .fno = gen_helper_gvec_vpopct16, },
+        { .fni4 = tcg_gen_ctpop_i32, },
+        { .fni8 = tcg_gen_ctpop_i64, },
+    };
+
+    if (es > ES_64 || (es != ES_8 && !s390_has_feat(S390_FEAT_VECTOR_ENH))) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]);
+    return DISAS_NEXT;
+}
+
+static void gen_rim_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, int32_t c)
+{
+    TCGv_i32 t = tcg_temp_new_i32();
+
+    tcg_gen_rotli_i32(t, a, c & 31);
+    tcg_gen_and_i32(t, t, b);
+    tcg_gen_andc_i32(d, d, b);
+    tcg_gen_or_i32(d, d, t);
+
+    tcg_temp_free_i32(t);
+}
+
+static void gen_rim_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, int64_t c)
+{
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    tcg_gen_rotli_i64(t, a, c & 63);
+    tcg_gen_and_i64(t, t, b);
+    tcg_gen_andc_i64(d, d, b);
+    tcg_gen_or_i64(d, d, t);
+
+    tcg_temp_free_i64(t);
+}
+
+static DisasJumpType op_verim(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m5);
+    const uint8_t i4 = get_field(s, i4) &
+                       (NUM_VEC_ELEMENT_BITS(es) - 1);
+    static const GVecGen3i g[4] = {
+        { .fno = gen_helper_gvec_verim8, },
+        { .fno = gen_helper_gvec_verim16, },
+        { .fni4 = gen_rim_i32,
+          .load_dest = true, },
+        { .fni8 = gen_rim_i64,
+          .load_dest = true, },
+    };
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_3i(get_field(s, v1), get_field(s, v2),
+                get_field(s, v3), i4, &g[es]);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vesv(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    const uint8_t v1 = get_field(s, v1);
+    const uint8_t v2 = get_field(s, v2);
+    const uint8_t v3 = get_field(s, v3);
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    switch (s->fields.op2) {
+    case 0x70:
+        gen_gvec_fn_3(shlv, es, v1, v2, v3);
+        break;
+    case 0x73:
+        gen_gvec_fn_3(rotlv, es, v1, v2, v3);
+        break;
+    case 0x7a:
+        gen_gvec_fn_3(sarv, es, v1, v2, v3);
+        break;
+    case 0x78:
+        gen_gvec_fn_3(shrv, es, v1, v2, v3);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ves(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    const uint8_t d2 = get_field(s, d2) &
+                       (NUM_VEC_ELEMENT_BITS(es) - 1);
+    const uint8_t v1 = get_field(s, v1);
+    const uint8_t v3 = get_field(s, v3);
+    TCGv_i32 shift;
+
+    if (es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (likely(!get_field(s, b2))) {
+        switch (s->fields.op2) {
+        case 0x30:
+            gen_gvec_fn_2i(shli, es, v1, v3, d2);
+            break;
+        case 0x33:
+            gen_gvec_fn_2i(rotli, es, v1, v3, d2);
+            break;
+        case 0x3a:
+            gen_gvec_fn_2i(sari, es, v1, v3, d2);
+            break;
+        case 0x38:
+            gen_gvec_fn_2i(shri, es, v1, v3, d2);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    } else {
+        shift = tcg_temp_new_i32();
+        tcg_gen_extrl_i64_i32(shift, o->addr1);
+        tcg_gen_andi_i32(shift, shift, NUM_VEC_ELEMENT_BITS(es) - 1);
+        switch (s->fields.op2) {
+        case 0x30:
+            gen_gvec_fn_2s(shls, es, v1, v3, shift);
+            break;
+        case 0x33:
+            gen_gvec_fn_2s(rotls, es, v1, v3, shift);
+            break;
+        case 0x3a:
+            gen_gvec_fn_2s(sars, es, v1, v3, shift);
+            break;
+        case 0x38:
+            gen_gvec_fn_2s(shrs, es, v1, v3, shift);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        tcg_temp_free_i32(shift);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vsl(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 shift = tcg_temp_new_i64();
+
+    read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
+    if (s->fields.op2 == 0x74) {
+        tcg_gen_andi_i64(shift, shift, 0x7);
+    } else {
+        tcg_gen_andi_i64(shift, shift, 0x78);
+    }
+
+    gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2),
+                    shift, 0, gen_helper_gvec_vsl);
+    tcg_temp_free_i64(shift);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vsldb(DisasContext *s, DisasOps *o)
+{
+    const uint8_t i4 = get_field(s, i4) & 0xf;
+    const int left_shift = (i4 & 7) * 8;
+    const int right_shift = 64 - left_shift;
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    if ((i4 & 8) == 0) {
+        read_vec_element_i64(t0, get_field(s, v2), 0, ES_64);
+        read_vec_element_i64(t1, get_field(s, v2), 1, ES_64);
+        read_vec_element_i64(t2, get_field(s, v3), 0, ES_64);
+    } else {
+        read_vec_element_i64(t0, get_field(s, v2), 1, ES_64);
+        read_vec_element_i64(t1, get_field(s, v3), 0, ES_64);
+        read_vec_element_i64(t2, get_field(s, v3), 1, ES_64);
+    }
+    tcg_gen_extract2_i64(t0, t1, t0, right_shift);
+    tcg_gen_extract2_i64(t1, t2, t1, right_shift);
+    write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
+    write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vsra(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 shift = tcg_temp_new_i64();
+
+    read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
+    if (s->fields.op2 == 0x7e) {
+        tcg_gen_andi_i64(shift, shift, 0x7);
+    } else {
+        tcg_gen_andi_i64(shift, shift, 0x78);
+    }
+
+    gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2),
+                    shift, 0, gen_helper_gvec_vsra);
+    tcg_temp_free_i64(shift);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vsrl(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 shift = tcg_temp_new_i64();
+
+    read_vec_element_i64(shift, get_field(s, v3), 7, ES_8);
+    if (s->fields.op2 == 0x7c) {
+        tcg_gen_andi_i64(shift, shift, 0x7);
+    } else {
+        tcg_gen_andi_i64(shift, shift, 0x78);
+    }
+
+    gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2),
+                    shift, 0, gen_helper_gvec_vsrl);
+    tcg_temp_free_i64(shift);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vs(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+
+    if (es > ES_128) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    } else if (es == ES_128) {
+        gen_gvec128_3_i64(tcg_gen_sub2_i64, get_field(s, v1),
+                          get_field(s, v2), get_field(s, v3));
+        return DISAS_NEXT;
+    }
+    gen_gvec_fn_3(sub, es, get_field(s, v1), get_field(s, v2),
+                  get_field(s, v3));
+    return DISAS_NEXT;
+}
+
+static void gen_scbi_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    tcg_gen_setcond_i32(TCG_COND_GEU, d, a, b);
+}
+
+static void gen_scbi_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+    tcg_gen_setcond_i64(TCG_COND_GEU, d, a, b);
+}
+
+static void gen_scbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al,
+                          TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
+{
+    TCGv_i64 th = tcg_temp_new_i64();
+    TCGv_i64 tl = tcg_temp_new_i64();
+    TCGv_i64 zero = tcg_const_i64(0);
+
+    tcg_gen_sub2_i64(tl, th, al, zero, bl, zero);
+    tcg_gen_andi_i64(th, th, 1);
+    tcg_gen_sub2_i64(tl, th, ah, zero, th, zero);
+    tcg_gen_sub2_i64(tl, th, tl, th, bh, zero);
+    /* "invert" the result: -1 -> 0; 0 -> 1 */
+    tcg_gen_addi_i64(dl, th, 1);
+    tcg_gen_mov_i64(dh, zero);
+
+    tcg_temp_free_i64(th);
+    tcg_temp_free_i64(tl);
+    tcg_temp_free_i64(zero);
+}
+
+static DisasJumpType op_vscbi(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    static const GVecGen3 g[4] = {
+        { .fno = gen_helper_gvec_vscbi8, },
+        { .fno = gen_helper_gvec_vscbi16, },
+        { .fni4 = gen_scbi_i32, },
+        { .fni8 = gen_scbi_i64, },
+    };
+
+    if (es > ES_128) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    } else if (es == ES_128) {
+        gen_gvec128_3_i64(gen_scbi2_i64, get_field(s, v1),
+                          get_field(s, v2), get_field(s, v3));
+        return DISAS_NEXT;
+    }
+    gen_gvec_3(get_field(s, v1), get_field(s, v2),
+               get_field(s, v3), &g[es]);
+    return DISAS_NEXT;
+}
+
+static void gen_sbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
+                         TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
+{
+    TCGv_i64 tl = tcg_temp_new_i64();
+    TCGv_i64 th = tcg_temp_new_i64();
+
+    tcg_gen_not_i64(tl, bl);
+    tcg_gen_not_i64(th, bh);
+    gen_ac2_i64(dl, dh, al, ah, tl, th, cl, ch);
+    tcg_temp_free_i64(tl);
+    tcg_temp_free_i64(th);
+}
+
+static DisasJumpType op_vsbi(DisasContext *s, DisasOps *o)
+{
+    if (get_field(s, m5) != ES_128) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec128_4_i64(gen_sbi2_i64, get_field(s, v1),
+                      get_field(s, v2), get_field(s, v3),
+                      get_field(s, v4));
+    return DISAS_NEXT;
+}
+
+static void gen_sbcbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
+                           TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch)
+{
+    TCGv_i64 th = tcg_temp_new_i64();
+    TCGv_i64 tl = tcg_temp_new_i64();
+
+    tcg_gen_not_i64(tl, bl);
+    tcg_gen_not_i64(th, bh);
+    gen_accc2_i64(dl, dh, al, ah, tl, th, cl, ch);
+
+    tcg_temp_free_i64(tl);
+    tcg_temp_free_i64(th);
+}
+
+static DisasJumpType op_vsbcbi(DisasContext *s, DisasOps *o)
+{
+    if (get_field(s, m5) != ES_128) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec128_4_i64(gen_sbcbi2_i64, get_field(s, v1),
+                      get_field(s, v2), get_field(s, v3),
+                      get_field(s, v4));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vsumg(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    TCGv_i64 sum, tmp;
+    uint8_t dst_idx;
+
+    if (es == ES_8 || es > ES_32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    sum = tcg_temp_new_i64();
+    tmp = tcg_temp_new_i64();
+    for (dst_idx = 0; dst_idx < 2; dst_idx++) {
+        uint8_t idx = dst_idx * NUM_VEC_ELEMENTS(es) / 2;
+        const uint8_t max_idx = idx + NUM_VEC_ELEMENTS(es) / 2 - 1;
+
+        read_vec_element_i64(sum, get_field(s, v3), max_idx, es);
+        for (; idx <= max_idx; idx++) {
+            read_vec_element_i64(tmp, get_field(s, v2), idx, es);
+            tcg_gen_add_i64(sum, sum, tmp);
+        }
+        write_vec_element_i64(sum, get_field(s, v1), dst_idx, ES_64);
+    }
+    tcg_temp_free_i64(sum);
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vsumq(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    const uint8_t max_idx = NUM_VEC_ELEMENTS(es) - 1;
+    TCGv_i64 sumh, suml, zero, tmpl;
+    uint8_t idx;
+
+    if (es < ES_32 || es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    sumh = tcg_const_i64(0);
+    suml = tcg_temp_new_i64();
+    zero = tcg_const_i64(0);
+    tmpl = tcg_temp_new_i64();
+
+    read_vec_element_i64(suml, get_field(s, v3), max_idx, es);
+    for (idx = 0; idx <= max_idx; idx++) {
+        read_vec_element_i64(tmpl, get_field(s, v2), idx, es);
+        tcg_gen_add2_i64(suml, sumh, suml, sumh, tmpl, zero);
+    }
+    write_vec_element_i64(sumh, get_field(s, v1), 0, ES_64);
+    write_vec_element_i64(suml, get_field(s, v1), 1, ES_64);
+
+    tcg_temp_free_i64(sumh);
+    tcg_temp_free_i64(suml);
+    tcg_temp_free_i64(zero);
+    tcg_temp_free_i64(tmpl);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vsum(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    TCGv_i32 sum, tmp;
+    uint8_t dst_idx;
+
+    if (es > ES_16) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    sum = tcg_temp_new_i32();
+    tmp = tcg_temp_new_i32();
+    for (dst_idx = 0; dst_idx < 4; dst_idx++) {
+        uint8_t idx = dst_idx * NUM_VEC_ELEMENTS(es) / 4;
+        const uint8_t max_idx = idx + NUM_VEC_ELEMENTS(es) / 4 - 1;
+
+        read_vec_element_i32(sum, get_field(s, v3), max_idx, es);
+        for (; idx <= max_idx; idx++) {
+            read_vec_element_i32(tmp, get_field(s, v2), idx, es);
+            tcg_gen_add_i32(sum, sum, tmp);
+        }
+        write_vec_element_i32(sum, get_field(s, v1), dst_idx, ES_32);
+    }
+    tcg_temp_free_i32(sum);
+    tcg_temp_free_i32(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vtm(DisasContext *s, DisasOps *o)
+{
+    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2),
+                   cpu_env, 0, gen_helper_gvec_vtm);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfae(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    const uint8_t m5 = get_field(s, m5);
+    static gen_helper_gvec_3 * const g[3] = {
+        gen_helper_gvec_vfae8,
+        gen_helper_gvec_vfae16,
+        gen_helper_gvec_vfae32,
+    };
+    static gen_helper_gvec_3_ptr * const g_cc[3] = {
+        gen_helper_gvec_vfae_cc8,
+        gen_helper_gvec_vfae_cc16,
+        gen_helper_gvec_vfae_cc32,
+    };
+    if (es > ES_32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 0, 1)) {
+        gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
+                       get_field(s, v3), cpu_env, m5, g_cc[es]);
+        set_cc_static(s);
+    } else {
+        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
+                       get_field(s, v3), m5, g[es]);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfee(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    const uint8_t m5 = get_field(s, m5);
+    static gen_helper_gvec_3 * const g[3] = {
+        gen_helper_gvec_vfee8,
+        gen_helper_gvec_vfee16,
+        gen_helper_gvec_vfee32,
+    };
+    static gen_helper_gvec_3_ptr * const g_cc[3] = {
+        gen_helper_gvec_vfee_cc8,
+        gen_helper_gvec_vfee_cc16,
+        gen_helper_gvec_vfee_cc32,
+    };
+
+    if (es > ES_32 || m5 & ~0x3) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 0, 1)) {
+        gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
+                       get_field(s, v3), cpu_env, m5, g_cc[es]);
+        set_cc_static(s);
+    } else {
+        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
+                       get_field(s, v3), m5, g[es]);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfene(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    const uint8_t m5 = get_field(s, m5);
+    static gen_helper_gvec_3 * const g[3] = {
+        gen_helper_gvec_vfene8,
+        gen_helper_gvec_vfene16,
+        gen_helper_gvec_vfene32,
+    };
+    static gen_helper_gvec_3_ptr * const g_cc[3] = {
+        gen_helper_gvec_vfene_cc8,
+        gen_helper_gvec_vfene_cc16,
+        gen_helper_gvec_vfene_cc32,
+    };
+
+    if (es > ES_32 || m5 & ~0x3) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 0, 1)) {
+        gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
+                       get_field(s, v3), cpu_env, m5, g_cc[es]);
+        set_cc_static(s);
+    } else {
+        gen_gvec_3_ool(get_field(s, v1), get_field(s, v2),
+                       get_field(s, v3), m5, g[es]);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vistr(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m4);
+    const uint8_t m5 = get_field(s, m5);
+    static gen_helper_gvec_2 * const g[3] = {
+        gen_helper_gvec_vistr8,
+        gen_helper_gvec_vistr16,
+        gen_helper_gvec_vistr32,
+    };
+    static gen_helper_gvec_2_ptr * const g_cc[3] = {
+        gen_helper_gvec_vistr_cc8,
+        gen_helper_gvec_vistr_cc16,
+        gen_helper_gvec_vistr_cc32,
+    };
+
+    if (es > ES_32 || m5 & ~0x1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m5, 0, 1)) {
+        gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2),
+                       cpu_env, 0, g_cc[es]);
+        set_cc_static(s);
+    } else {
+        gen_gvec_2_ool(get_field(s, v1), get_field(s, v2), 0,
+                       g[es]);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m5);
+    const uint8_t m6 = get_field(s, m6);
+    static gen_helper_gvec_4 * const g[3] = {
+        gen_helper_gvec_vstrc8,
+        gen_helper_gvec_vstrc16,
+        gen_helper_gvec_vstrc32,
+    };
+    static gen_helper_gvec_4 * const g_rt[3] = {
+        gen_helper_gvec_vstrc_rt8,
+        gen_helper_gvec_vstrc_rt16,
+        gen_helper_gvec_vstrc_rt32,
+    };
+    static gen_helper_gvec_4_ptr * const g_cc[3] = {
+        gen_helper_gvec_vstrc_cc8,
+        gen_helper_gvec_vstrc_cc16,
+        gen_helper_gvec_vstrc_cc32,
+    };
+    static gen_helper_gvec_4_ptr * const g_cc_rt[3] = {
+        gen_helper_gvec_vstrc_cc_rt8,
+        gen_helper_gvec_vstrc_cc_rt16,
+        gen_helper_gvec_vstrc_cc_rt32,
+    };
+
+    if (es > ES_32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (extract32(m6, 0, 1)) {
+        if (extract32(m6, 2, 1)) {
+            gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
+                           get_field(s, v3), get_field(s, v4),
+                           cpu_env, m6, g_cc_rt[es]);
+        } else {
+            gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
+                           get_field(s, v3), get_field(s, v4),
+                           cpu_env, m6, g_cc[es]);
+        }
+        set_cc_static(s);
+    } else {
+        if (extract32(m6, 2, 1)) {
+            gen_gvec_4_ool(get_field(s, v1), get_field(s, v2),
+                           get_field(s, v3), get_field(s, v4),
+                           m6, g_rt[es]);
+        } else {
+            gen_gvec_4_ool(get_field(s, v1), get_field(s, v2),
+                           get_field(s, v3), get_field(s, v4),
+                           m6, g[es]);
+        }
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s, m4);
+    const uint8_t m5 = get_field(s, m5);
+    gen_helper_gvec_3_ptr *fn = NULL;
+
+    switch (s->fields.op2) {
+    case 0xe3:
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfa32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfa64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfa128;
+            }
+            break;
+        default:
+            break;
+        }
+        break;
+    case 0xe5:
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfd32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfd64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfd128;
+            }
+            break;
+        default:
+            break;
+        }
+        break;
+    case 0xe7:
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfm32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfm64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfm128;
+            }
+            break;
+        default:
+            break;
+        }
+        break;
+    case 0xe2:
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfs32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfs64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfs128;
+            }
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (!fn || extract32(m5, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
+                   get_field(s, v3), cpu_env, m5, fn);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_wfc(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s, m3);
+    const uint8_t m4 = get_field(s, m4);
+    gen_helper_gvec_2_ptr *fn = NULL;
+
+    switch (fpf) {
+    case FPF_SHORT:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            fn = gen_helper_gvec_wfk32;
+            if (s->fields.op2 == 0xcb) {
+                fn = gen_helper_gvec_wfc32;
+            }
+        }
+        break;
+    case FPF_LONG:
+        fn = gen_helper_gvec_wfk64;
+        if (s->fields.op2 == 0xcb) {
+            fn = gen_helper_gvec_wfc64;
+        }
+        break;
+    case FPF_EXT:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            fn = gen_helper_gvec_wfk128;
+            if (s->fields.op2 == 0xcb) {
+                fn = gen_helper_gvec_wfc128;
+            }
+        }
+        break;
+    default:
+        break;
+    };
+
+    if (!fn || m4) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, 0, fn);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfc(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s, m4);
+    const uint8_t m5 = get_field(s, m5);
+    const uint8_t m6 = get_field(s, m6);
+    const bool cs = extract32(m6, 0, 1);
+    const bool sq = extract32(m5, 2, 1);
+    gen_helper_gvec_3_ptr *fn = NULL;
+
+    switch (s->fields.op2) {
+    case 0xe8:
+        switch (fpf) {
+        case FPF_SHORT:
+            fn = cs ? gen_helper_gvec_vfce32_cc : gen_helper_gvec_vfce32;
+            break;
+        case FPF_LONG:
+            fn = cs ? gen_helper_gvec_vfce64_cc : gen_helper_gvec_vfce64;
+            break;
+        case FPF_EXT:
+            fn = cs ? gen_helper_gvec_vfce128_cc : gen_helper_gvec_vfce128;
+            break;
+        default:
+            break;
+        }
+        break;
+    case 0xeb:
+        switch (fpf) {
+        case FPF_SHORT:
+            fn = cs ? gen_helper_gvec_vfch32_cc : gen_helper_gvec_vfch32;
+            break;
+        case FPF_LONG:
+            fn = cs ? gen_helper_gvec_vfch64_cc : gen_helper_gvec_vfch64;
+            break;
+        case FPF_EXT:
+            fn = cs ? gen_helper_gvec_vfch128_cc : gen_helper_gvec_vfch128;
+            break;
+        default:
+            break;
+        }
+        break;
+    case 0xea:
+        switch (fpf) {
+        case FPF_SHORT:
+            fn = cs ? gen_helper_gvec_vfche32_cc : gen_helper_gvec_vfche32;
+            break;
+        case FPF_LONG:
+            fn = cs ? gen_helper_gvec_vfche64_cc : gen_helper_gvec_vfche64;
+            break;
+        case FPF_EXT:
+            fn = cs ? gen_helper_gvec_vfche128_cc : gen_helper_gvec_vfche128;
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (!fn || extract32(m5, 0, 2) || extract32(m6, 1, 3) ||
+        (!s390_has_feat(S390_FEAT_VECTOR_ENH) && (fpf != FPF_LONG || sq))) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), get_field(s, v3),
+                   cpu_env, m5, fn);
+    if (cs) {
+        set_cc_static(s);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s, m3);
+    const uint8_t m4 = get_field(s, m4);
+    const uint8_t erm = get_field(s, m5);
+    gen_helper_gvec_2_ptr *fn = NULL;
+
+
+    switch (s->fields.op2) {
+    case 0xc3:
+        if (fpf == FPF_LONG) {
+            fn = gen_helper_gvec_vcdg64;
+        }
+        break;
+    case 0xc1:
+        if (fpf == FPF_LONG) {
+            fn = gen_helper_gvec_vcdlg64;
+        }
+        break;
+    case 0xc2:
+        if (fpf == FPF_LONG) {
+            fn = gen_helper_gvec_vcgd64;
+        }
+        break;
+    case 0xc0:
+        if (fpf == FPF_LONG) {
+            fn = gen_helper_gvec_vclgd64;
+        }
+        break;
+    case 0xc7:
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfi32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfi64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfi128;
+            }
+            break;
+        default:
+            break;
+        }
+        break;
+    case 0xc5:
+        switch (fpf) {
+        case FPF_LONG:
+            fn = gen_helper_gvec_vflr64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vflr128;
+            }
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (!fn || extract32(m4, 0, 2) || erm > 7 || erm == 2) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
+                   deposit32(m4, 4, 4, erm), fn);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfll(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s, m3);
+    const uint8_t m4 = get_field(s, m4);
+    gen_helper_gvec_2_ptr *fn = NULL;
+
+    switch (fpf) {
+    case FPF_SHORT:
+        fn = gen_helper_gvec_vfll32;
+        break;
+    case FPF_LONG:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            fn = gen_helper_gvec_vfll64;
+        }
+        break;
+    default:
+        break;
+    }
+
+    if (!fn || extract32(m4, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, m4, fn);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfmax(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s, m4);
+    const uint8_t m6 = get_field(s, m6);
+    const uint8_t m5 = get_field(s, m5);
+    gen_helper_gvec_3_ptr *fn;
+
+    if (m6 == 5 || m6 == 6 || m6 == 7 || m6 > 13) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    switch (fpf) {
+    case FPF_SHORT:
+        if (s->fields.op2 == 0xef) {
+            fn = gen_helper_gvec_vfmax32;
+        } else {
+            fn = gen_helper_gvec_vfmin32;
+        }
+        break;
+    case FPF_LONG:
+        if (s->fields.op2 == 0xef) {
+            fn = gen_helper_gvec_vfmax64;
+        } else {
+            fn = gen_helper_gvec_vfmin64;
+        }
+        break;
+    case FPF_EXT:
+        if (s->fields.op2 == 0xef) {
+            fn = gen_helper_gvec_vfmax128;
+        } else {
+            fn = gen_helper_gvec_vfmin128;
+        }
+        break;
+    default:
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), get_field(s, v3),
+                   cpu_env, deposit32(m5, 4, 4, m6), fn);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfma(DisasContext *s, DisasOps *o)
+{
+    const uint8_t m5 = get_field(s, m5);
+    const uint8_t fpf = get_field(s, m6);
+    gen_helper_gvec_4_ptr *fn = NULL;
+
+    switch (s->fields.op2) {
+    case 0x8f:
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfma32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfma64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfma128;
+            }
+            break;
+        default:
+            break;
+        }
+        break;
+    case 0x8e:
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfms32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfms64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfms128;
+            }
+            break;
+        default:
+            break;
+        }
+        break;
+    case 0x9f:
+        switch (fpf) {
+        case FPF_SHORT:
+            fn = gen_helper_gvec_vfnma32;
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfnma64;
+            break;
+        case FPF_EXT:
+            fn = gen_helper_gvec_vfnma128;
+            break;
+        default:
+            break;
+        }
+        break;
+    case 0x9e:
+        switch (fpf) {
+        case FPF_SHORT:
+            fn = gen_helper_gvec_vfnms32;
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfnms64;
+            break;
+        case FPF_EXT:
+            fn = gen_helper_gvec_vfnms128;
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (!fn || extract32(m5, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
+                   get_field(s, v3), get_field(s, v4), cpu_env, m5, fn);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfpso(DisasContext *s, DisasOps *o)
+{
+    const uint8_t v1 = get_field(s, v1);
+    const uint8_t v2 = get_field(s, v2);
+    const uint8_t fpf = get_field(s, m3);
+    const uint8_t m4 = get_field(s, m4);
+    const uint8_t m5 = get_field(s, m5);
+    const bool se = extract32(m4, 3, 1);
+    TCGv_i64 tmp;
+
+    if ((fpf != FPF_LONG && !s390_has_feat(S390_FEAT_VECTOR_ENH)) ||
+        extract32(m4, 0, 3) || m5 > 2) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    switch (fpf) {
+    case FPF_SHORT:
+        if (!se) {
+            switch (m5) {
+            case 0:
+                /* sign bit is inverted (complement) */
+                gen_gvec_fn_2i(xori, ES_32, v1, v2, 1ull << 31);
+                break;
+            case 1:
+                /* sign bit is set to one (negative) */
+                gen_gvec_fn_2i(ori, ES_32, v1, v2, 1ull << 31);
+                break;
+            case 2:
+                /* sign bit is set to zero (positive) */
+                gen_gvec_fn_2i(andi, ES_32, v1, v2, (1ull << 31) - 1);
+                break;
+            }
+            return DISAS_NEXT;
+        }
+        break;
+    case FPF_LONG:
+        if (!se) {
+            switch (m5) {
+            case 0:
+                /* sign bit is inverted (complement) */
+                gen_gvec_fn_2i(xori, ES_64, v1, v2, 1ull << 63);
+                break;
+            case 1:
+                /* sign bit is set to one (negative) */
+                gen_gvec_fn_2i(ori, ES_64, v1, v2, 1ull << 63);
+                break;
+            case 2:
+                /* sign bit is set to zero (positive) */
+                gen_gvec_fn_2i(andi, ES_64, v1, v2, (1ull << 63) - 1);
+                break;
+            }
+            return DISAS_NEXT;
+        }
+        break;
+    case FPF_EXT:
+        /* Only a single element. */
+        break;
+    default:
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    /* With a single element, we are only interested in bit 0. */
+    tmp = tcg_temp_new_i64();
+    read_vec_element_i64(tmp, v2, 0, ES_64);
+    switch (m5) {
+    case 0:
+        /* sign bit is inverted (complement) */
+        tcg_gen_xori_i64(tmp, tmp, 1ull << 63);
+        break;
+    case 1:
+        /* sign bit is set to one (negative) */
+        tcg_gen_ori_i64(tmp, tmp, 1ull << 63);
+        break;
+    case 2:
+        /* sign bit is set to zero (positive) */
+        tcg_gen_andi_i64(tmp, tmp, (1ull << 63) - 1);
+        break;
+    }
+    write_vec_element_i64(tmp, v1, 0, ES_64);
+
+    if (fpf == FPF_EXT) {
+        read_vec_element_i64(tmp, v2, 1, ES_64);
+        write_vec_element_i64(tmp, v1, 1, ES_64);
+    }
+
+    tcg_temp_free_i64(tmp);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfsq(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s, m3);
+    const uint8_t m4 = get_field(s, m4);
+    gen_helper_gvec_2_ptr *fn = NULL;
+
+    switch (fpf) {
+    case FPF_SHORT:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            fn = gen_helper_gvec_vfsq32;
+        }
+        break;
+    case FPF_LONG:
+        fn = gen_helper_gvec_vfsq64;
+        break;
+    case FPF_EXT:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            fn = gen_helper_gvec_vfsq128;
+        }
+        break;
+    default:
+        break;
+    }
+
+    if (!fn || extract32(m4, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, m4, fn);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vftci(DisasContext *s, DisasOps *o)
+{
+    const uint16_t i3 = get_field(s, i3);
+    const uint8_t fpf = get_field(s, m4);
+    const uint8_t m5 = get_field(s, m5);
+    gen_helper_gvec_2_ptr *fn = NULL;
+
+    switch (fpf) {
+    case FPF_SHORT:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            fn = gen_helper_gvec_vftci32;
+        }
+        break;
+    case FPF_LONG:
+        fn = gen_helper_gvec_vftci64;
+        break;
+    case FPF_EXT:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            fn = gen_helper_gvec_vftci128;
+        }
+        break;
+    default:
+        break;
+    }
+
+    if (!fn || extract32(m5, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
+                   deposit32(m5, 4, 12, i3), fn);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/tcg/vec.h b/target/s390x/tcg/vec.h
new file mode 100644
index 0000000000..a6e361869b
--- /dev/null
+++ b/target/s390x/tcg/vec.h
@@ -0,0 +1,141 @@
+/*
+ * QEMU TCG support -- s390x vector utilitites
+ *
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ *   David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef S390X_VEC_H
+#define S390X_VEC_H
+
+#include "tcg/tcg.h"
+
+typedef union S390Vector {
+    uint64_t doubleword[2];
+    uint32_t word[4];
+    uint16_t halfword[8];
+    uint8_t byte[16];
+} S390Vector;
+
+/*
+ * Each vector is stored as two 64bit host values. So when talking about
+ * byte/halfword/word numbers, we have to take care of proper translation
+ * between element numbers.
+ *
+ * Big Endian (target/possible host)
+ * B:  [ 0][ 1][ 2][ 3][ 4][ 5][ 6][ 7] - [ 8][ 9][10][11][12][13][14][15]
+ * HW: [     0][     1][     2][     3] - [     4][     5][     6][     7]
+ * W:  [             0][             1] - [             2][             3]
+ * DW: [                             0] - [                             1]
+ *
+ * Little Endian (possible host)
+ * B:  [ 7][ 6][ 5][ 4][ 3][ 2][ 1][ 0] - [15][14][13][12][11][10][ 9][ 8]
+ * HW: [     3][     2][     1][     0] - [     7][     6][     5][     4]
+ * W:  [             1][             0] - [             3][             2]
+ * DW: [                             0] - [                             1]
+ */
+#ifndef HOST_WORDS_BIGENDIAN
+#define H1(x)  ((x) ^ 7)
+#define H2(x)  ((x) ^ 3)
+#define H4(x)  ((x) ^ 1)
+#else
+#define H1(x)  (x)
+#define H2(x)  (x)
+#define H4(x)  (x)
+#endif
+
+static inline uint8_t s390_vec_read_element8(const S390Vector *v, uint8_t enr)
+{
+    g_assert(enr < 16);
+    return v->byte[H1(enr)];
+}
+
+static inline uint16_t s390_vec_read_element16(const S390Vector *v, uint8_t enr)
+{
+    g_assert(enr < 8);
+    return v->halfword[H2(enr)];
+}
+
+static inline uint32_t s390_vec_read_element32(const S390Vector *v, uint8_t enr)
+{
+    g_assert(enr < 4);
+    return v->word[H4(enr)];
+}
+
+static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr)
+{
+    g_assert(enr < 2);
+    return v->doubleword[enr];
+}
+
+static inline uint64_t s390_vec_read_element(const S390Vector *v, uint8_t enr,
+                                             uint8_t es)
+{
+    switch (es) {
+    case MO_8:
+        return s390_vec_read_element8(v, enr);
+    case MO_16:
+        return s390_vec_read_element16(v, enr);
+    case MO_32:
+        return s390_vec_read_element32(v, enr);
+    case MO_64:
+        return s390_vec_read_element64(v, enr);
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static inline void s390_vec_write_element8(S390Vector *v, uint8_t enr,
+                                           uint8_t data)
+{
+    g_assert(enr < 16);
+    v->byte[H1(enr)] = data;
+}
+
+static inline void s390_vec_write_element16(S390Vector *v, uint8_t enr,
+                                            uint16_t data)
+{
+    g_assert(enr < 8);
+    v->halfword[H2(enr)] = data;
+}
+
+static inline void s390_vec_write_element32(S390Vector *v, uint8_t enr,
+                                            uint32_t data)
+{
+    g_assert(enr < 4);
+    v->word[H4(enr)] = data;
+}
+
+static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr,
+                                            uint64_t data)
+{
+    g_assert(enr < 2);
+    v->doubleword[enr] = data;
+}
+
+static inline void s390_vec_write_element(S390Vector *v, uint8_t enr,
+                                          uint8_t es, uint64_t data)
+{
+    switch (es) {
+    case MO_8:
+        s390_vec_write_element8(v, enr, data);
+        break;
+    case MO_16:
+        s390_vec_write_element16(v, enr, data);
+        break;
+    case MO_32:
+        s390_vec_write_element32(v, enr, data);
+        break;
+    case MO_64:
+        s390_vec_write_element64(v, enr, data);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+#endif /* S390X_VEC_H */
diff --git a/target/s390x/tcg/vec_fpu_helper.c b/target/s390x/tcg/vec_fpu_helper.c
new file mode 100644
index 0000000000..1a77993471
--- /dev/null
+++ b/target/s390x/tcg/vec_fpu_helper.c
@@ -0,0 +1,1072 @@
+/*
+ * QEMU TCG support -- s390x vector floating point instruction support
+ *
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ *   David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "vec.h"
+#include "tcg_s390x.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+
+#define VIC_INVALID         0x1
+#define VIC_DIVBYZERO       0x2
+#define VIC_OVERFLOW        0x3
+#define VIC_UNDERFLOW       0x4
+#define VIC_INEXACT         0x5
+
+/* returns the VEX. If the VEX is 0, there is no trap */
+static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
+                              uint8_t *vec_exc)
+{
+    uint8_t vece_exc = 0, trap_exc;
+    unsigned qemu_exc;
+
+    /* Retrieve and clear the softfloat exceptions */
+    qemu_exc = env->fpu_status.float_exception_flags;
+    if (qemu_exc == 0) {
+        return 0;
+    }
+    env->fpu_status.float_exception_flags = 0;
+
+    vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
+
+    /* Add them to the vector-wide s390x exception bits */
+    *vec_exc |= vece_exc;
+
+    /* Check for traps and construct the VXC */
+    trap_exc = vece_exc & env->fpc >> 24;
+    if (trap_exc) {
+        if (trap_exc & S390_IEEE_MASK_INVALID) {
+            return enr << 4 | VIC_INVALID;
+        } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
+            return enr << 4 | VIC_DIVBYZERO;
+        } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
+            return enr << 4 | VIC_OVERFLOW;
+        } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
+            return enr << 4 | VIC_UNDERFLOW;
+        } else if (!XxC) {
+            g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
+            /* inexact has lowest priority on traps */
+            return enr << 4 | VIC_INEXACT;
+        }
+    }
+    return 0;
+}
+
+static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
+                            uintptr_t retaddr)
+{
+    if (vxc) {
+        /* on traps, the fpc flags are not updated, instruction is suppressed */
+        tcg_s390_vector_exception(env, vxc, retaddr);
+    }
+    if (vec_exc) {
+        /* indicate exceptions for all elements combined */
+        env->fpc |= vec_exc << 16;
+    }
+}
+
+static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr)
+{
+    return make_float32(s390_vec_read_element32(v, enr));
+}
+
+static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
+{
+    return make_float64(s390_vec_read_element64(v, enr));
+}
+
+static float128 s390_vec_read_float128(const S390Vector *v)
+{
+    return make_float128(s390_vec_read_element64(v, 0),
+                         s390_vec_read_element64(v, 1));
+}
+
+static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data)
+{
+    return s390_vec_write_element32(v, enr, data);
+}
+
+static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
+{
+    return s390_vec_write_element64(v, enr, data);
+}
+
+static void s390_vec_write_float128(S390Vector *v, float128 data)
+{
+    s390_vec_write_element64(v, 0, data.high);
+    s390_vec_write_element64(v, 1, data.low);
+}
+
+typedef float32 (*vop32_2_fn)(float32 a, float_status *s);
+static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+                    bool s, bool XxC, uint8_t erm, vop32_2_fn fn,
+                    uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i, old_mode;
+
+    old_mode = s390_swap_bfp_rounding_mode(env, erm);
+    for (i = 0; i < 4; i++) {
+        const float32 a = s390_vec_read_float32(v2, i);
+
+        s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status));
+        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
+static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+                    bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
+                    uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i, old_mode;
+
+    old_mode = s390_swap_bfp_rounding_mode(env, erm);
+    for (i = 0; i < 2; i++) {
+        const float64 a = s390_vec_read_float64(v2, i);
+
+        s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status));
+        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+typedef float128 (*vop128_2_fn)(float128 a, float_status *s);
+static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+                    bool s, bool XxC, uint8_t erm, vop128_2_fn fn,
+                    uintptr_t retaddr)
+{
+    const float128 a = s390_vec_read_float128(v2);
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int old_mode;
+
+    old_mode = s390_swap_bfp_rounding_mode(env, erm);
+    s390_vec_write_float128(&tmp, fn(a, &env->fpu_status));
+    vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+static float64 vcdg64(float64 a, float_status *s)
+{
+    return int64_to_float64(a, s);
+}
+
+static float64 vcdlg64(float64 a, float_status *s)
+{
+    return uint64_to_float64(a, s);
+}
+
+static float64 vcgd64(float64 a, float_status *s)
+{
+    const float64 tmp = float64_to_int64(a, s);
+
+    return float64_is_any_nan(a) ? INT64_MIN : tmp;
+}
+
+static float64 vclgd64(float64 a, float_status *s)
+{
+    const float64 tmp = float64_to_uint64(a, s);
+
+    return float64_is_any_nan(a) ? 0 : tmp;
+}
+
+#define DEF_GVEC_VOP2_FN(NAME, FN, BITS)                                       \
+void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env,   \
+                               uint32_t desc)                                  \
+{                                                                              \
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);                      \
+    const bool se = extract32(simd_data(desc), 3, 1);                          \
+    const bool XxC = extract32(simd_data(desc), 2, 1);                         \
+                                                                               \
+    vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC());                     \
+}
+
+#define DEF_GVEC_VOP2_64(NAME)                                                 \
+DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
+
+#define DEF_GVEC_VOP2(NAME, OP)                                                \
+DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32)                                       \
+DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64)                                       \
+DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
+
+DEF_GVEC_VOP2_64(vcdg)
+DEF_GVEC_VOP2_64(vcdlg)
+DEF_GVEC_VOP2_64(vcgd)
+DEF_GVEC_VOP2_64(vclgd)
+DEF_GVEC_VOP2(vfi, round_to_int)
+DEF_GVEC_VOP2(vfsq, sqrt)
+
+typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s);
+static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                    CPUS390XState *env, bool s, vop32_3_fn fn,
+                    uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        const float32 a = s390_vec_read_float32(v2, i);
+        const float32 b = s390_vec_read_float32(v3, i);
+
+        s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status));
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s);
+static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                    CPUS390XState *env, bool s, vop64_3_fn fn,
+                    uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        const float64 a = s390_vec_read_float64(v2, i);
+        const float64 b = s390_vec_read_float64(v3, i);
+
+        s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status));
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s);
+static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                     CPUS390XState *env, bool s, vop128_3_fn fn,
+                     uintptr_t retaddr)
+{
+    const float128 a = s390_vec_read_float128(v2);
+    const float128 b = s390_vec_read_float128(v3);
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+
+    s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status));
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+#define DEF_GVEC_VOP3_B(NAME, OP, BITS)                                        \
+void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
+                              CPUS390XState *env, uint32_t desc)               \
+{                                                                              \
+    const bool se = extract32(simd_data(desc), 3, 1);                          \
+                                                                               \
+    vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC());           \
+}
+
+#define DEF_GVEC_VOP3(NAME, OP)                                                \
+DEF_GVEC_VOP3_B(NAME, OP, 32)                                                  \
+DEF_GVEC_VOP3_B(NAME, OP, 64)                                                  \
+DEF_GVEC_VOP3_B(NAME, OP, 128)
+
+DEF_GVEC_VOP3(vfa, add)
+DEF_GVEC_VOP3(vfs, sub)
+DEF_GVEC_VOP3(vfd, div)
+DEF_GVEC_VOP3(vfm, mul)
+
+static int wfc32(const S390Vector *v1, const S390Vector *v2,
+                 CPUS390XState *env, bool signal, uintptr_t retaddr)
+{
+    /* only the zero-indexed elements are compared */
+    const float32 a = s390_vec_read_float32(v1, 0);
+    const float32 b = s390_vec_read_float32(v2, 0);
+    uint8_t vxc, vec_exc = 0;
+    int cmp;
+
+    if (signal) {
+        cmp = float32_compare(a, b, &env->fpu_status);
+    } else {
+        cmp = float32_compare_quiet(a, b, &env->fpu_status);
+    }
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+
+    return float_comp_to_cc(env, cmp);
+}
+
+static int wfc64(const S390Vector *v1, const S390Vector *v2,
+                 CPUS390XState *env, bool signal, uintptr_t retaddr)
+{
+    /* only the zero-indexed elements are compared */
+    const float64 a = s390_vec_read_float64(v1, 0);
+    const float64 b = s390_vec_read_float64(v2, 0);
+    uint8_t vxc, vec_exc = 0;
+    int cmp;
+
+    if (signal) {
+        cmp = float64_compare(a, b, &env->fpu_status);
+    } else {
+        cmp = float64_compare_quiet(a, b, &env->fpu_status);
+    }
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+
+    return float_comp_to_cc(env, cmp);
+}
+
+static int wfc128(const S390Vector *v1, const S390Vector *v2,
+                  CPUS390XState *env, bool signal, uintptr_t retaddr)
+{
+    /* only the zero-indexed elements are compared */
+    const float128 a = s390_vec_read_float128(v1);
+    const float128 b = s390_vec_read_float128(v2);
+    uint8_t vxc, vec_exc = 0;
+    int cmp;
+
+    if (signal) {
+        cmp = float128_compare(a, b, &env->fpu_status);
+    } else {
+        cmp = float128_compare_quiet(a, b, &env->fpu_status);
+    }
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+
+    return float_comp_to_cc(env, cmp);
+}
+
+#define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS)                                     \
+void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2,                 \
+                               CPUS390XState *env, uint32_t desc)              \
+{                                                                              \
+    env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC());                      \
+}
+
+#define DEF_GVEC_WFC(NAME, SIGNAL)                                             \
+     DEF_GVEC_WFC_B(NAME, SIGNAL, 32)                                          \
+     DEF_GVEC_WFC_B(NAME, SIGNAL, 64)                                          \
+     DEF_GVEC_WFC_B(NAME, SIGNAL, 128)
+
+DEF_GVEC_WFC(wfc, false)
+DEF_GVEC_WFC(wfk, true)
+
+typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status);
+static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                 CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int match = 0;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        const float32 a = s390_vec_read_float32(v2, i);
+        const float32 b = s390_vec_read_float32(v3, i);
+
+        /* swap the order of the parameters, so we can use existing functions */
+        if (fn(b, a, &env->fpu_status)) {
+            match++;
+            s390_vec_write_element32(&tmp, i, -1u);
+        }
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+    if (match) {
+        return s || match == 4 ? 0 : 1;
+    }
+    return 3;
+}
+
+typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
+static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int match = 0;
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        const float64 a = s390_vec_read_float64(v2, i);
+        const float64 b = s390_vec_read_float64(v3, i);
+
+        /* swap the order of the parameters, so we can use existing functions */
+        if (fn(b, a, &env->fpu_status)) {
+            match++;
+            s390_vec_write_element64(&tmp, i, -1ull);
+        }
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+    if (match) {
+        return s || match == 2 ? 0 : 1;
+    }
+    return 3;
+}
+
+typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status);
+static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                 CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr)
+{
+    const float128 a = s390_vec_read_float128(v2);
+    const float128 b = s390_vec_read_float128(v3);
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    bool match = false;
+
+    /* swap the order of the parameters, so we can use existing functions */
+    if (fn(b, a, &env->fpu_status)) {
+        match = true;
+        s390_vec_write_element64(&tmp, 0, -1ull);
+        s390_vec_write_element64(&tmp, 1, -1ull);
+    }
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+    return match ? 0 : 3;
+}
+
+#define DEF_GVEC_VFC_B(NAME, OP, BITS)                                         \
+void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
+                               CPUS390XState *env, uint32_t desc)              \
+{                                                                              \
+    const bool se = extract32(simd_data(desc), 3, 1);                          \
+    const bool sq = extract32(simd_data(desc), 2, 1);                          \
+    vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
+                                                                               \
+    vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                               \
+}                                                                              \
+                                                                               \
+void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3,  \
+                                    CPUS390XState *env, uint32_t desc)         \
+{                                                                              \
+    const bool se = extract32(simd_data(desc), 3, 1);                          \
+    const bool sq = extract32(simd_data(desc), 2, 1);                          \
+    vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
+                                                                               \
+    env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                  \
+}
+
+#define DEF_GVEC_VFC(NAME, OP)                                                 \
+DEF_GVEC_VFC_B(NAME, OP, 32)                                                   \
+DEF_GVEC_VFC_B(NAME, OP, 64)                                                   \
+DEF_GVEC_VFC_B(NAME, OP, 128)                                                  \
+
+DEF_GVEC_VFC(vfce, eq)
+DEF_GVEC_VFC(vfch, lt)
+DEF_GVEC_VFC(vfche, le)
+
+void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    const bool s = extract32(simd_data(desc), 3, 1);
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        /* load from even element */
+        const float32 a = s390_vec_read_element32(v2, i * 2);
+        const uint64_t ret = float32_to_float64(a, &env->fpu_status);
+
+        s390_vec_write_element64(&tmp, i, ret);
+        /* indicate the source element */
+        vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, GETPC());
+    *(S390Vector *)v1 = tmp;
+}
+
+void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    /* load from even element */
+    const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0),
+                                             &env->fpu_status);
+    uint8_t vxc, vec_exc = 0;
+
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, GETPC());
+    s390_vec_write_float128(v1, ret);
+}
+
+void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool s = extract32(simd_data(desc), 3, 1);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i, old_mode;
+
+    old_mode = s390_swap_bfp_rounding_mode(env, erm);
+    for (i = 0; i < 2; i++) {
+        float64 a = s390_vec_read_element64(v2, i);
+        uint32_t ret = float64_to_float32(a, &env->fpu_status);
+
+        /* place at even element */
+        s390_vec_write_element32(&tmp, i * 2, ret);
+        /* indicate the source element */
+        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_ieee_exc(env, vxc, vec_exc, GETPC());
+    *(S390Vector *)v1 = tmp;
+}
+
+void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+    uint8_t vxc, vec_exc = 0;
+    int old_mode;
+    float64 ret;
+
+    old_mode = s390_swap_bfp_rounding_mode(env, erm);
+    ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status);
+    vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_ieee_exc(env, vxc, vec_exc, GETPC());
+
+    /* place at even element, odd element is unpredictable */
+    s390_vec_write_float64(v1, 0, ret);
+}
+
+static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                   const S390Vector *v4, CPUS390XState *env, bool s, int flags,
+                   uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        const float32 a = s390_vec_read_float32(v2, i);
+        const float32 b = s390_vec_read_float32(v3, i);
+        const float32 c = s390_vec_read_float32(v4, i);
+        float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status);
+
+        s390_vec_write_float32(&tmp, i, ret);
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                   const S390Vector *v4, CPUS390XState *env, bool s, int flags,
+                   uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        const float64 a = s390_vec_read_float64(v2, i);
+        const float64 b = s390_vec_read_float64(v3, i);
+        const float64 c = s390_vec_read_float64(v4, i);
+        const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status);
+
+        s390_vec_write_float64(&tmp, i, ret);
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                    const S390Vector *v4, CPUS390XState *env, bool s, int flags,
+                    uintptr_t retaddr)
+{
+    const float128 a = s390_vec_read_float128(v2);
+    const float128 b = s390_vec_read_float128(v3);
+    const float128 c = s390_vec_read_float128(v4);
+    uint8_t vxc, vec_exc = 0;
+    float128 ret;
+
+    ret = float128_muladd(a, b, c, flags, &env->fpu_status);
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    s390_vec_write_float128(v1, ret);
+}
+
+#define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS)                                     \
+void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
+                               const void *v4, CPUS390XState *env,             \
+                               uint32_t desc)                                  \
+{                                                                              \
+    const bool se = extract32(simd_data(desc), 3, 1);                          \
+                                                                               \
+    vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC());                       \
+}
+
+#define DEF_GVEC_VFMA(NAME, FLAGS)                                             \
+    DEF_GVEC_VFMA_B(NAME, FLAGS, 32)                                           \
+    DEF_GVEC_VFMA_B(NAME, FLAGS, 64)                                           \
+    DEF_GVEC_VFMA_B(NAME, FLAGS, 128)
+
+DEF_GVEC_VFMA(vfma, 0)
+DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
+DEF_GVEC_VFMA(vfnma, float_muladd_negate_result)
+DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result)
+
+void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    uint16_t i3 = extract32(simd_data(desc), 4, 12);
+    bool s = extract32(simd_data(desc), 3, 1);
+    int i, match = 0;
+
+    for (i = 0; i < 4; i++) {
+        float32 a = s390_vec_read_float32(v2, i);
+
+        if (float32_dcmask(env, a) & i3) {
+            match++;
+            s390_vec_write_element32(v1, i, -1u);
+        } else {
+            s390_vec_write_element32(v1, i, 0);
+        }
+        if (s) {
+            break;
+        }
+    }
+
+    if (match == 4 || (s && match)) {
+        env->cc_op = 0;
+    } else if (match) {
+        env->cc_op = 1;
+    } else {
+        env->cc_op = 3;
+    }
+}
+
+void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    const uint16_t i3 = extract32(simd_data(desc), 4, 12);
+    const bool s = extract32(simd_data(desc), 3, 1);
+    int i, match = 0;
+
+    for (i = 0; i < 2; i++) {
+        const float64 a = s390_vec_read_float64(v2, i);
+
+        if (float64_dcmask(env, a) & i3) {
+            match++;
+            s390_vec_write_element64(v1, i, -1ull);
+        } else {
+            s390_vec_write_element64(v1, i, 0);
+        }
+        if (s) {
+            break;
+        }
+    }
+
+    if (match == 2 || (s && match)) {
+        env->cc_op = 0;
+    } else if (match) {
+        env->cc_op = 1;
+    } else {
+        env->cc_op = 3;
+    }
+}
+
+void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env,
+                           uint32_t desc)
+{
+    const float128 a = s390_vec_read_float128(v2);
+    uint16_t i3 = extract32(simd_data(desc), 4, 12);
+
+    if (float128_dcmask(env, a) & i3) {
+        env->cc_op = 0;
+        s390_vec_write_element64(v1, 0, -1ull);
+        s390_vec_write_element64(v1, 1, -1ull);
+    } else {
+        env->cc_op = 3;
+        s390_vec_write_element64(v1, 0, 0);
+        s390_vec_write_element64(v1, 1, 0);
+    }
+}
+
+typedef enum S390MinMaxType {
+    S390_MINMAX_TYPE_IEEE = 0,
+    S390_MINMAX_TYPE_JAVA,
+    S390_MINMAX_TYPE_C_MACRO,
+    S390_MINMAX_TYPE_CPP,
+    S390_MINMAX_TYPE_F,
+} S390MinMaxType;
+
+typedef enum S390MinMaxRes {
+    S390_MINMAX_RES_MINMAX = 0,
+    S390_MINMAX_RES_A,
+    S390_MINMAX_RES_B,
+    S390_MINMAX_RES_SILENCE_A,
+    S390_MINMAX_RES_SILENCE_B,
+} S390MinMaxRes;
+
+static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b,
+                               S390MinMaxType type, float_status *s)
+{
+    const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
+    const bool nan_a = dcmask_a & DCMASK_NAN;
+    const bool nan_b = dcmask_b & DCMASK_NAN;
+
+    g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
+
+    if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
+        const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
+        const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
+
+        if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
+            s->float_exception_flags |= float_flag_invalid;
+        }
+        switch (type) {
+        case S390_MINMAX_TYPE_JAVA:
+            if (sig_a) {
+                return S390_MINMAX_RES_SILENCE_A;
+            } else if (sig_b) {
+                return S390_MINMAX_RES_SILENCE_B;
+            }
+            return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_F:
+            return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_C_MACRO:
+            s->float_exception_flags |= float_flag_invalid;
+            return S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_CPP:
+            s->float_exception_flags |= float_flag_invalid;
+            return S390_MINMAX_RES_A;
+        default:
+            g_assert_not_reached();
+        }
+    } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) {
+        switch (type) {
+        case S390_MINMAX_TYPE_JAVA:
+            return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_C_MACRO:
+            return S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_F:
+            return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
+        case S390_MINMAX_TYPE_CPP:
+            return S390_MINMAX_RES_A;
+        default:
+            g_assert_not_reached();
+        }
+    }
+    return S390_MINMAX_RES_MINMAX;
+}
+
+static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
+                               S390MinMaxType type, float_status *s)
+{
+    g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
+
+    if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
+        const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
+        const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
+        const bool nan_a = dcmask_a & DCMASK_NAN;
+        const bool nan_b = dcmask_b & DCMASK_NAN;
+
+        if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
+            s->float_exception_flags |= float_flag_invalid;
+        }
+        switch (type) {
+        case S390_MINMAX_TYPE_JAVA:
+            if (sig_a) {
+                return S390_MINMAX_RES_SILENCE_A;
+            } else if (sig_b) {
+                return S390_MINMAX_RES_SILENCE_B;
+            }
+            return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_F:
+            return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_C_MACRO:
+            s->float_exception_flags |= float_flag_invalid;
+            return S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_CPP:
+            s->float_exception_flags |= float_flag_invalid;
+            return S390_MINMAX_RES_A;
+        default:
+            g_assert_not_reached();
+        }
+    } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) {
+        const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
+
+        switch (type) {
+        case S390_MINMAX_TYPE_JAVA:
+        case S390_MINMAX_TYPE_F:
+            return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
+        case S390_MINMAX_TYPE_C_MACRO:
+            return S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_CPP:
+            return S390_MINMAX_RES_A;
+        default:
+            g_assert_not_reached();
+        }
+    }
+    return S390_MINMAX_RES_MINMAX;
+}
+
+static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
+                                  S390MinMaxType type, bool is_min,
+                                  float_status *s)
+{
+    return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) :
+                    vfmax_res(dcmask_a, dcmask_b, type, s);
+}
+
+static void vfminmax32(S390Vector *v1, const S390Vector *v2,
+                       const S390Vector *v3, CPUS390XState *env,
+                       S390MinMaxType type, bool is_min, bool is_abs, bool se,
+                       uintptr_t retaddr)
+{
+    float_status *s = &env->fpu_status;
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        float32 a = s390_vec_read_float32(v2, i);
+        float32 b = s390_vec_read_float32(v3, i);
+        float32 result;
+
+        if (type != S390_MINMAX_TYPE_IEEE) {
+            S390MinMaxRes res;
+
+            if (is_abs) {
+                a = float32_abs(a);
+                b = float32_abs(b);
+            }
+
+            res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b),
+                               type, is_min, s);
+            switch (res) {
+            case S390_MINMAX_RES_MINMAX:
+                result = is_min ? float32_min(a, b, s) : float32_max(a, b, s);
+                break;
+            case S390_MINMAX_RES_A:
+                result = a;
+                break;
+            case S390_MINMAX_RES_B:
+                result = b;
+                break;
+            case S390_MINMAX_RES_SILENCE_A:
+                result = float32_silence_nan(a, s);
+                break;
+            case S390_MINMAX_RES_SILENCE_B:
+                result = float32_silence_nan(b, s);
+                break;
+            default:
+                g_assert_not_reached();
+            }
+        } else if (!is_abs) {
+            result = is_min ? float32_minnum(a, b, &env->fpu_status) :
+                              float32_maxnum(a, b, &env->fpu_status);
+        } else {
+            result = is_min ? float32_minnummag(a, b, &env->fpu_status) :
+                              float32_maxnummag(a, b, &env->fpu_status);
+        }
+
+        s390_vec_write_float32(&tmp, i, result);
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (se || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+static void vfminmax64(S390Vector *v1, const S390Vector *v2,
+                       const S390Vector *v3, CPUS390XState *env,
+                       S390MinMaxType type, bool is_min, bool is_abs, bool se,
+                       uintptr_t retaddr)
+{
+    float_status *s = &env->fpu_status;
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        float64 a = s390_vec_read_float64(v2, i);
+        float64 b = s390_vec_read_float64(v3, i);
+        float64 result;
+
+        if (type != S390_MINMAX_TYPE_IEEE) {
+            S390MinMaxRes res;
+
+            if (is_abs) {
+                a = float64_abs(a);
+                b = float64_abs(b);
+            }
+
+            res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b),
+                               type, is_min, s);
+            switch (res) {
+            case S390_MINMAX_RES_MINMAX:
+                result = is_min ? float64_min(a, b, s) : float64_max(a, b, s);
+                break;
+            case S390_MINMAX_RES_A:
+                result = a;
+                break;
+            case S390_MINMAX_RES_B:
+                result = b;
+                break;
+            case S390_MINMAX_RES_SILENCE_A:
+                result = float64_silence_nan(a, s);
+                break;
+            case S390_MINMAX_RES_SILENCE_B:
+                result = float64_silence_nan(b, s);
+                break;
+            default:
+                g_assert_not_reached();
+            }
+        } else if (!is_abs) {
+            result = is_min ? float64_minnum(a, b, &env->fpu_status) :
+                              float64_maxnum(a, b, &env->fpu_status);
+        } else {
+            result = is_min ? float64_minnummag(a, b, &env->fpu_status) :
+                              float64_maxnummag(a, b, &env->fpu_status);
+        }
+
+        s390_vec_write_float64(&tmp, i, result);
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (se || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+static void vfminmax128(S390Vector *v1, const S390Vector *v2,
+                        const S390Vector *v3, CPUS390XState *env,
+                        S390MinMaxType type, bool is_min, bool is_abs, bool se,
+                        uintptr_t retaddr)
+{
+    float128 a = s390_vec_read_float128(v2);
+    float128 b = s390_vec_read_float128(v3);
+    float_status *s = &env->fpu_status;
+    uint8_t vxc, vec_exc = 0;
+    float128 result;
+
+    if (type != S390_MINMAX_TYPE_IEEE) {
+        S390MinMaxRes res;
+
+        if (is_abs) {
+            a = float128_abs(a);
+            b = float128_abs(b);
+        }
+
+        res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b),
+                           type, is_min, s);
+        switch (res) {
+        case S390_MINMAX_RES_MINMAX:
+            result = is_min ? float128_min(a, b, s) : float128_max(a, b, s);
+            break;
+        case S390_MINMAX_RES_A:
+            result = a;
+            break;
+        case S390_MINMAX_RES_B:
+            result = b;
+            break;
+        case S390_MINMAX_RES_SILENCE_A:
+            result = float128_silence_nan(a, s);
+            break;
+        case S390_MINMAX_RES_SILENCE_B:
+            result = float128_silence_nan(b, s);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    } else if (!is_abs) {
+        result = is_min ? float128_minnum(a, b, &env->fpu_status) :
+                          float128_maxnum(a, b, &env->fpu_status);
+    } else {
+        result = is_min ? float128_minnummag(a, b, &env->fpu_status) :
+                          float128_maxnummag(a, b, &env->fpu_status);
+    }
+
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    s390_vec_write_float128(v1, result);
+}
+
+#define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS)                                \
+void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
+                               CPUS390XState *env, uint32_t desc)              \
+{                                                                              \
+    const bool se = extract32(simd_data(desc), 3, 1);                          \
+    uint8_t type = extract32(simd_data(desc), 4, 4);                           \
+    bool is_abs = false;                                                       \
+                                                                               \
+    if (type >= 8) {                                                           \
+        is_abs = true;                                                         \
+        type -= 8;                                                             \
+    }                                                                          \
+                                                                               \
+    vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC());        \
+}
+
+#define DEF_GVEC_VFMINMAX(NAME, IS_MIN)                                        \
+    DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32)                                      \
+    DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64)                                      \
+    DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128)
+
+DEF_GVEC_VFMINMAX(vfmax, false)
+DEF_GVEC_VFMINMAX(vfmin, true)
diff --git a/target/s390x/tcg/vec_helper.c b/target/s390x/tcg/vec_helper.c
new file mode 100644
index 0000000000..ededf13cf0
--- /dev/null
+++ b/target/s390x/tcg/vec_helper.c
@@ -0,0 +1,214 @@
+/*
+ * QEMU TCG support -- s390x vector support instructions
+ *
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ *   David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "vec.h"
+#include "tcg/tcg.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
+#include "exec/exec-all.h"
+
+void HELPER(gvec_vbperm)(void *v1, const void *v2, const void *v3,
+                         uint32_t desc)
+{
+    S390Vector tmp = {};
+    uint16_t result = 0;
+    int i;
+
+    for (i = 0; i < 16; i++) {
+        const uint8_t bit_nr = s390_vec_read_element8(v3, i);
+        uint16_t bit;
+
+        if (bit_nr >= 128) {
+            continue;
+        }
+        bit = (s390_vec_read_element8(v2, bit_nr / 8)
+               >> (7 - (bit_nr % 8))) & 1;
+        result |= (bit << (15 - i));
+    }
+    s390_vec_write_element16(&tmp, 3, result);
+    *(S390Vector *)v1 = tmp;
+}
+
+void HELPER(vll)(CPUS390XState *env, void *v1, uint64_t addr, uint64_t bytes)
+{
+    if (likely(bytes >= 16)) {
+        uint64_t t0, t1;
+
+        t0 = cpu_ldq_data_ra(env, addr, GETPC());
+        addr = wrap_address(env, addr + 8);
+        t1 = cpu_ldq_data_ra(env, addr, GETPC());
+        s390_vec_write_element64(v1, 0, t0);
+        s390_vec_write_element64(v1, 1, t1);
+    } else {
+        S390Vector tmp = {};
+        int i;
+
+        for (i = 0; i < bytes; i++) {
+            uint8_t byte = cpu_ldub_data_ra(env, addr, GETPC());
+
+            s390_vec_write_element8(&tmp, i, byte);
+            addr = wrap_address(env, addr + 1);
+        }
+        *(S390Vector *)v1 = tmp;
+    }
+}
+
+#define DEF_VPK_HFN(BITS, TBITS)                                               \
+typedef uint##TBITS##_t (*vpk##BITS##_fn)(uint##BITS##_t, int *);              \
+static int vpk##BITS##_hfn(S390Vector *v1, const S390Vector *v2,               \
+                           const S390Vector *v3, vpk##BITS##_fn fn)            \
+{                                                                              \
+    int i, saturated = 0;                                                      \
+    S390Vector tmp;                                                            \
+                                                                               \
+    for (i = 0; i < (128 / TBITS); i++) {                                      \
+        uint##BITS##_t src;                                                    \
+                                                                               \
+        if (i < (128 / BITS)) {                                                \
+            src = s390_vec_read_element##BITS(v2, i);                          \
+        } else {                                                               \
+            src = s390_vec_read_element##BITS(v3, i - (128 / BITS));           \
+        }                                                                      \
+        s390_vec_write_element##TBITS(&tmp, i, fn(src, &saturated));           \
+    }                                                                          \
+    *v1 = tmp;                                                                 \
+    return saturated;                                                          \
+}
+DEF_VPK_HFN(64, 32)
+DEF_VPK_HFN(32, 16)
+DEF_VPK_HFN(16, 8)
+
+#define DEF_VPK(BITS, TBITS)                                                   \
+static uint##TBITS##_t vpk##BITS##e(uint##BITS##_t src, int *saturated)        \
+{                                                                              \
+    return src;                                                                \
+}                                                                              \
+void HELPER(gvec_vpk##BITS)(void *v1, const void *v2, const void *v3,          \
+                            uint32_t desc)                                     \
+{                                                                              \
+    vpk##BITS##_hfn(v1, v2, v3, vpk##BITS##e);                                 \
+}
+DEF_VPK(64, 32)
+DEF_VPK(32, 16)
+DEF_VPK(16, 8)
+
+#define DEF_VPKS(BITS, TBITS)                                                  \
+static uint##TBITS##_t vpks##BITS##e(uint##BITS##_t src, int *saturated)       \
+{                                                                              \
+    if ((int##BITS##_t)src > INT##TBITS##_MAX) {                               \
+        (*saturated)++;                                                        \
+        return INT##TBITS##_MAX;                                               \
+    } else if ((int##BITS##_t)src < INT##TBITS##_MIN) {                        \
+        (*saturated)++;                                                        \
+        return INT##TBITS##_MIN;                                               \
+    }                                                                          \
+    return src;                                                                \
+}                                                                              \
+void HELPER(gvec_vpks##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    vpk##BITS##_hfn(v1, v2, v3, vpks##BITS##e);                                \
+}                                                                              \
+void HELPER(gvec_vpks_cc##BITS)(void *v1, const void *v2, const void *v3,      \
+                                CPUS390XState *env, uint32_t desc)             \
+{                                                                              \
+    int saturated = vpk##BITS##_hfn(v1, v2, v3, vpks##BITS##e);                \
+                                                                               \
+    if (saturated == (128 / TBITS)) {                                          \
+        env->cc_op = 3;                                                        \
+    } else if (saturated) {                                                    \
+        env->cc_op = 1;                                                        \
+    } else {                                                                   \
+        env->cc_op = 0;                                                        \
+    }                                                                          \
+}
+DEF_VPKS(64, 32)
+DEF_VPKS(32, 16)
+DEF_VPKS(16, 8)
+
+#define DEF_VPKLS(BITS, TBITS)                                                 \
+static uint##TBITS##_t vpkls##BITS##e(uint##BITS##_t src, int *saturated)      \
+{                                                                              \
+    if (src > UINT##TBITS##_MAX) {                                             \
+        (*saturated)++;                                                        \
+        return UINT##TBITS##_MAX;                                              \
+    }                                                                          \
+    return src;                                                                \
+}                                                                              \
+void HELPER(gvec_vpkls##BITS)(void *v1, const void *v2, const void *v3,        \
+                              uint32_t desc)                                   \
+{                                                                              \
+    vpk##BITS##_hfn(v1, v2, v3, vpkls##BITS##e);                               \
+}                                                                              \
+void HELPER(gvec_vpkls_cc##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 CPUS390XState *env, uint32_t desc)            \
+{                                                                              \
+    int saturated = vpk##BITS##_hfn(v1, v2, v3, vpkls##BITS##e);               \
+                                                                               \
+    if (saturated == (128 / TBITS)) {                                          \
+        env->cc_op = 3;                                                        \
+    } else if (saturated) {                                                    \
+        env->cc_op = 1;                                                        \
+    } else {                                                                   \
+        env->cc_op = 0;                                                        \
+    }                                                                          \
+}
+DEF_VPKLS(64, 32)
+DEF_VPKLS(32, 16)
+DEF_VPKLS(16, 8)
+
+void HELPER(gvec_vperm)(void *v1, const void *v2, const void *v3,
+                        const void *v4, uint32_t desc)
+{
+    S390Vector tmp;
+    int i;
+
+    for (i = 0; i < 16; i++) {
+        const uint8_t selector = s390_vec_read_element8(v4, i) & 0x1f;
+        uint8_t byte;
+
+        if (selector < 16) {
+            byte = s390_vec_read_element8(v2, selector);
+        } else {
+            byte = s390_vec_read_element8(v3, selector - 16);
+        }
+        s390_vec_write_element8(&tmp, i, byte);
+    }
+    *(S390Vector *)v1 = tmp;
+}
+
+void HELPER(vstl)(CPUS390XState *env, const void *v1, uint64_t addr,
+                  uint64_t bytes)
+{
+    /* Probe write access before actually modifying memory */
+    probe_write_access(env, addr, bytes, GETPC());
+
+    if (likely(bytes >= 16)) {
+        cpu_stq_data_ra(env, addr, s390_vec_read_element64(v1, 0), GETPC());
+        addr = wrap_address(env, addr + 8);
+        cpu_stq_data_ra(env, addr, s390_vec_read_element64(v1, 1), GETPC());
+    } else {
+        S390Vector tmp = {};
+        int i;
+
+        for (i = 0; i < bytes; i++) {
+            uint8_t byte = s390_vec_read_element8(v1, i);
+
+            cpu_stb_data_ra(env, addr, byte, GETPC());
+            addr = wrap_address(env, addr + 1);
+        }
+        *(S390Vector *)v1 = tmp;
+    }
+}
diff --git a/target/s390x/tcg/vec_int_helper.c b/target/s390x/tcg/vec_int_helper.c
new file mode 100644
index 0000000000..5561b3ed90
--- /dev/null
+++ b/target/s390x/tcg/vec_int_helper.c
@@ -0,0 +1,587 @@
+/*
+ * QEMU TCG support -- s390x vector integer instruction support
+ *
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ *   David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "vec.h"
+#include "exec/helper-proto.h"
+#include "tcg/tcg-gvec-desc.h"
+
+static bool s390_vec_is_zero(const S390Vector *v)
+{
+    return !v->doubleword[0] && !v->doubleword[1];
+}
+
+static void s390_vec_xor(S390Vector *res, const S390Vector *a,
+                         const S390Vector *b)
+{
+    res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
+    res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
+}
+
+static void s390_vec_and(S390Vector *res, const S390Vector *a,
+                         const S390Vector *b)
+{
+    res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
+    res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
+}
+
+static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
+{
+    return a->doubleword[0] == b->doubleword[0] &&
+           a->doubleword[1] == b->doubleword[1];
+}
+
+static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
+{
+    uint64_t tmp;
+
+    g_assert(count < 128);
+    if (count == 0) {
+        d->doubleword[0] = a->doubleword[0];
+        d->doubleword[1] = a->doubleword[1];
+    } else if (count == 64) {
+        d->doubleword[0] = a->doubleword[1];
+        d->doubleword[1] = 0;
+    } else if (count < 64) {
+        tmp = extract64(a->doubleword[1], 64 - count, count);
+        d->doubleword[1] = a->doubleword[1] << count;
+        d->doubleword[0] = (a->doubleword[0] << count) | tmp;
+    } else {
+        d->doubleword[0] = a->doubleword[1] << (count - 64);
+        d->doubleword[1] = 0;
+    }
+}
+
+static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
+{
+    uint64_t tmp;
+
+    if (count == 0) {
+        d->doubleword[0] = a->doubleword[0];
+        d->doubleword[1] = a->doubleword[1];
+    } else if (count == 64) {
+        tmp = (int64_t)a->doubleword[0] >> 63;
+        d->doubleword[1] = a->doubleword[0];
+        d->doubleword[0] = tmp;
+    } else if (count < 64) {
+        tmp = a->doubleword[1] >> count;
+        d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
+        d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
+    } else {
+        tmp = (int64_t)a->doubleword[0] >> 63;
+        d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
+        d->doubleword[0] = tmp;
+    }
+}
+
+static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
+{
+    uint64_t tmp;
+
+    g_assert(count < 128);
+    if (count == 0) {
+        d->doubleword[0] = a->doubleword[0];
+        d->doubleword[1] = a->doubleword[1];
+    } else if (count == 64) {
+        d->doubleword[1] = a->doubleword[0];
+        d->doubleword[0] = 0;
+    } else if (count < 64) {
+        tmp = a->doubleword[1] >> count;
+        d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
+        d->doubleword[0] = a->doubleword[0] >> count;
+    } else {
+        d->doubleword[1] = a->doubleword[0] >> (count - 64);
+        d->doubleword[0] = 0;
+    }
+}
+#define DEF_VAVG(BITS)                                                         \
+void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / BITS); i++) {                                       \
+        const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
+        const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
+                                                                               \
+        s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
+    }                                                                          \
+}
+DEF_VAVG(8)
+DEF_VAVG(16)
+
+#define DEF_VAVGL(BITS)                                                        \
+void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3,        \
+                              uint32_t desc)                                   \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / BITS); i++) {                                       \
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
+        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
+                                                                               \
+        s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
+    }                                                                          \
+}
+DEF_VAVGL(8)
+DEF_VAVGL(16)
+
+#define DEF_VCLZ(BITS)                                                         \
+void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc)          \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / BITS); i++) {                                       \
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
+                                                                               \
+        s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS);             \
+    }                                                                          \
+}
+DEF_VCLZ(8)
+DEF_VCLZ(16)
+
+#define DEF_VCTZ(BITS)                                                         \
+void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc)          \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / BITS); i++) {                                       \
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
+                                                                               \
+        s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS);              \
+    }                                                                          \
+}
+DEF_VCTZ(8)
+DEF_VCTZ(16)
+
+/* like binary multiplication, but XOR instead of addition */
+#define DEF_GALOIS_MULTIPLY(BITS, TBITS)                                       \
+static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a,                \
+                                             uint##TBITS##_t b)                \
+{                                                                              \
+    uint##TBITS##_t res = 0;                                                   \
+                                                                               \
+    while (b) {                                                                \
+        if (b & 0x1) {                                                         \
+            res = res ^ a;                                                     \
+        }                                                                      \
+        a = a << 1;                                                            \
+        b = b >> 1;                                                            \
+    }                                                                          \
+    return res;                                                                \
+}
+DEF_GALOIS_MULTIPLY(8, 16)
+DEF_GALOIS_MULTIPLY(16, 32)
+DEF_GALOIS_MULTIPLY(32, 64)
+
+static S390Vector galois_multiply64(uint64_t a, uint64_t b)
+{
+    S390Vector res = {};
+    S390Vector va = {
+        .doubleword[1] = a,
+    };
+    S390Vector vb = {
+        .doubleword[1] = b,
+    };
+
+    while (!s390_vec_is_zero(&vb)) {
+        if (vb.doubleword[1] & 0x1) {
+            s390_vec_xor(&res, &res, &va);
+        }
+        s390_vec_shl(&va, &va, 1);
+        s390_vec_shr(&vb, &vb, 1);
+    }
+    return res;
+}
+
+#define DEF_VGFM(BITS, TBITS)                                                  \
+void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / TBITS); i++) {                                      \
+        uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
+        uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
+        uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
+                                                                               \
+        a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
+        b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
+        d = d ^ galois_multiply32(a, b);                                       \
+        s390_vec_write_element##TBITS(v1, i, d);                               \
+    }                                                                          \
+}
+DEF_VGFM(8, 16)
+DEF_VGFM(16, 32)
+DEF_VGFM(32, 64)
+
+void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
+                         uint32_t desc)
+{
+    S390Vector tmp1, tmp2;
+    uint64_t a, b;
+
+    a = s390_vec_read_element64(v2, 0);
+    b = s390_vec_read_element64(v3, 0);
+    tmp1 = galois_multiply64(a, b);
+    a = s390_vec_read_element64(v2, 1);
+    b = s390_vec_read_element64(v3, 1);
+    tmp2 = galois_multiply64(a, b);
+    s390_vec_xor(v1, &tmp1, &tmp2);
+}
+
+#define DEF_VGFMA(BITS, TBITS)                                                 \
+void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3,        \
+                              const void *v4, uint32_t desc)                   \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / TBITS); i++) {                                      \
+        uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
+        uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
+        uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
+                                                                               \
+        a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
+        b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
+        d = d ^ galois_multiply32(a, b);                                       \
+        d = d ^ s390_vec_read_element##TBITS(v4, i);                           \
+        s390_vec_write_element##TBITS(v1, i, d);                               \
+    }                                                                          \
+}
+DEF_VGFMA(8, 16)
+DEF_VGFMA(16, 32)
+DEF_VGFMA(32, 64)
+
+void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
+                          const void *v4, uint32_t desc)
+{
+    S390Vector tmp1, tmp2;
+    uint64_t a, b;
+
+    a = s390_vec_read_element64(v2, 0);
+    b = s390_vec_read_element64(v3, 0);
+    tmp1 = galois_multiply64(a, b);
+    a = s390_vec_read_element64(v2, 1);
+    b = s390_vec_read_element64(v3, 1);
+    tmp2 = galois_multiply64(a, b);
+    s390_vec_xor(&tmp1, &tmp1, &tmp2);
+    s390_vec_xor(v1, &tmp1, v4);
+}
+
+#define DEF_VMAL(BITS)                                                         \
+void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3,         \
+                             const void *v4, uint32_t desc)                    \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / BITS); i++) {                                       \
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
+        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
+        const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
+                                                                               \
+        s390_vec_write_element##BITS(v1, i, a * b + c);                        \
+    }                                                                          \
+}
+DEF_VMAL(8)
+DEF_VMAL(16)
+
+#define DEF_VMAH(BITS)                                                         \
+void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3,         \
+                             const void *v4, uint32_t desc)                    \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / BITS); i++) {                                       \
+        const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
+        const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
+        const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i);   \
+                                                                               \
+        s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
+    }                                                                          \
+}
+DEF_VMAH(8)
+DEF_VMAH(16)
+
+#define DEF_VMALH(BITS)                                                        \
+void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3,        \
+                              const void *v4, uint32_t desc)                   \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / BITS); i++) {                                       \
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
+        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
+        const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
+                                                                               \
+        s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
+    }                                                                          \
+}
+DEF_VMALH(8)
+DEF_VMALH(16)
+
+#define DEF_VMAE(BITS, TBITS)                                                  \
+void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3,         \
+                             const void *v4, uint32_t desc)                    \
+{                                                                              \
+    int i, j;                                                                  \
+                                                                               \
+    for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
+        int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
+        int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
+        int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
+                                                                               \
+        s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
+    }                                                                          \
+}
+DEF_VMAE(8, 16)
+DEF_VMAE(16, 32)
+DEF_VMAE(32, 64)
+
+#define DEF_VMALE(BITS, TBITS)                                                 \
+void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3,        \
+                              const void *v4, uint32_t desc)                   \
+{                                                                              \
+    int i, j;                                                                  \
+                                                                               \
+    for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
+        uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
+        uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
+        uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
+                                                                               \
+        s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
+    }                                                                          \
+}
+DEF_VMALE(8, 16)
+DEF_VMALE(16, 32)
+DEF_VMALE(32, 64)
+
+#define DEF_VMAO(BITS, TBITS)                                                  \
+void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3,         \
+                             const void *v4, uint32_t desc)                    \
+{                                                                              \
+    int i, j;                                                                  \
+                                                                               \
+    for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
+        int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
+        int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
+        int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
+                                                                               \
+        s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
+    }                                                                          \
+}
+DEF_VMAO(8, 16)
+DEF_VMAO(16, 32)
+DEF_VMAO(32, 64)
+
+#define DEF_VMALO(BITS, TBITS)                                                 \
+void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3,        \
+                              const void *v4, uint32_t desc)                   \
+{                                                                              \
+    int i, j;                                                                  \
+                                                                               \
+    for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
+        uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
+        uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
+        uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
+                                                                               \
+        s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
+    }                                                                          \
+}
+DEF_VMALO(8, 16)
+DEF_VMALO(16, 32)
+DEF_VMALO(32, 64)
+
+#define DEF_VMH(BITS)                                                          \
+void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3,          \
+                            uint32_t desc)                                     \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / BITS); i++) {                                       \
+        const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
+        const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
+                                                                               \
+        s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
+    }                                                                          \
+}
+DEF_VMH(8)
+DEF_VMH(16)
+
+#define DEF_VMLH(BITS)                                                         \
+void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / BITS); i++) {                                       \
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
+        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
+                                                                               \
+        s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
+    }                                                                          \
+}
+DEF_VMLH(8)
+DEF_VMLH(16)
+
+#define DEF_VME(BITS, TBITS)                                                   \
+void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3,          \
+                            uint32_t desc)                                     \
+{                                                                              \
+    int i, j;                                                                  \
+                                                                               \
+    for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
+        int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
+        int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
+                                                                               \
+        s390_vec_write_element##TBITS(v1, i, a * b);                           \
+    }                                                                          \
+}
+DEF_VME(8, 16)
+DEF_VME(16, 32)
+DEF_VME(32, 64)
+
+#define DEF_VMLE(BITS, TBITS)                                                  \
+void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    int i, j;                                                                  \
+                                                                               \
+    for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
+        const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
+        const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
+                                                                               \
+        s390_vec_write_element##TBITS(v1, i, a * b);                           \
+    }                                                                          \
+}
+DEF_VMLE(8, 16)
+DEF_VMLE(16, 32)
+DEF_VMLE(32, 64)
+
+#define DEF_VMO(BITS, TBITS)                                                   \
+void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3,          \
+                            uint32_t desc)                                     \
+{                                                                              \
+    int i, j;                                                                  \
+                                                                               \
+    for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
+        int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
+        int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
+                                                                               \
+        s390_vec_write_element##TBITS(v1, i, a * b);                           \
+    }                                                                          \
+}
+DEF_VMO(8, 16)
+DEF_VMO(16, 32)
+DEF_VMO(32, 64)
+
+#define DEF_VMLO(BITS, TBITS)                                                  \
+void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    int i, j;                                                                  \
+                                                                               \
+    for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
+        const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
+        const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
+                                                                               \
+        s390_vec_write_element##TBITS(v1, i, a * b);                           \
+    }                                                                          \
+}
+DEF_VMLO(8, 16)
+DEF_VMLO(16, 32)
+DEF_VMLO(32, 64)
+
+#define DEF_VPOPCT(BITS)                                                       \
+void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc)        \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / BITS); i++) {                                       \
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
+                                                                               \
+        s390_vec_write_element##BITS(v1, i, ctpop32(a));                       \
+    }                                                                          \
+}
+DEF_VPOPCT(8)
+DEF_VPOPCT(16)
+
+#define DEF_VERIM(BITS)                                                        \
+void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3,        \
+                              uint32_t desc)                                   \
+{                                                                              \
+    const uint8_t count = simd_data(desc);                                     \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / BITS); i++) {                                       \
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i);           \
+        const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i);           \
+        const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i);        \
+        const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask);   \
+                                                                               \
+        s390_vec_write_element##BITS(v1, i, d);                                \
+    }                                                                          \
+}
+DEF_VERIM(8)
+DEF_VERIM(16)
+
+void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
+                      uint32_t desc)
+{
+    s390_vec_shl(v1, v2, count);
+}
+
+void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
+                       uint32_t desc)
+{
+    s390_vec_sar(v1, v2, count);
+}
+
+void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
+                       uint32_t desc)
+{
+    s390_vec_shr(v1, v2, count);
+}
+
+#define DEF_VSCBI(BITS)                                                        \
+void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3,        \
+                              uint32_t desc)                                   \
+{                                                                              \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / BITS); i++) {                                       \
+        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
+        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
+                                                                               \
+        s390_vec_write_element##BITS(v1, i, a >= b);                           \
+    }                                                                          \
+}
+DEF_VSCBI(8)
+DEF_VSCBI(16)
+
+void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
+                      uint32_t desc)
+{
+    S390Vector tmp;
+
+    s390_vec_and(&tmp, v1, v2);
+    if (s390_vec_is_zero(&tmp)) {
+        /* Selected bits all zeros; or all mask bits zero */
+        env->cc_op = 0;
+    } else if (s390_vec_equal(&tmp, v2)) {
+        /* Selected bits all ones */
+        env->cc_op = 3;
+    } else {
+        /* Selected bits a mix of zeros and ones */
+        env->cc_op = 1;
+    }
+}
diff --git a/target/s390x/tcg/vec_string_helper.c b/target/s390x/tcg/vec_string_helper.c
new file mode 100644
index 0000000000..ac315eb095
--- /dev/null
+++ b/target/s390x/tcg/vec_string_helper.c
@@ -0,0 +1,473 @@
+/*
+ * QEMU TCG support -- s390x vector string instruction support
+ *
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ *   David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "vec.h"
+#include "tcg/tcg.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "exec/helper-proto.h"
+
+/*
+ * Returns a bit set in the MSB of each element that is zero,
+ * as defined by the mask.
+ */
+static inline uint64_t zero_search(uint64_t a, uint64_t mask)
+{
+    return ~(((a & mask) + mask) | a | mask);
+}
+
+/*
+ * Returns a bit set in the MSB of each element that is not zero,
+ * as defined by the mask.
+ */
+static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
+{
+    return (((a & mask) + mask) | a) & ~mask;
+}
+
+/*
+ * Returns the byte offset for the first match, or 16 for no match.
+ */
+static inline int match_index(uint64_t c0, uint64_t c1)
+{
+    return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
+}
+
+/*
+ * Returns the number of bits composing one element.
+ */
+static uint8_t get_element_bits(uint8_t es)
+{
+    return (1 << es) * BITS_PER_BYTE;
+}
+
+/*
+ * Returns the bitmask for a single element.
+ */
+static uint64_t get_single_element_mask(uint8_t es)
+{
+    return -1ull >> (64 - get_element_bits(es));
+}
+
+/*
+ * Returns the bitmask for a single element (excluding the MSB).
+ */
+static uint64_t get_single_element_lsbs_mask(uint8_t es)
+{
+    return -1ull >> (65 - get_element_bits(es));
+}
+
+/*
+ * Returns the bitmasks for multiple elements (excluding the MSBs).
+ */
+static uint64_t get_element_lsbs_mask(uint8_t es)
+{
+    return dup_const(es, get_single_element_lsbs_mask(es));
+}
+
+static int vfae(void *v1, const void *v2, const void *v3, bool in,
+                bool rt, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    const int bits = get_element_bits(es);
+    uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
+    uint64_t first_zero = 16;
+    uint64_t first_equal;
+    int i;
+
+    a0 = s390_vec_read_element64(v2, 0);
+    a1 = s390_vec_read_element64(v2, 1);
+    b0 = s390_vec_read_element64(v3, 0);
+    b1 = s390_vec_read_element64(v3, 1);
+    e0 = 0;
+    e1 = 0;
+    /* compare against equality with every other element */
+    for (i = 0; i < 64; i += bits) {
+        t0 = rol64(b0, i);
+        t1 = rol64(b1, i);
+        e0 |= zero_search(a0 ^ t0, mask);
+        e0 |= zero_search(a0 ^ t1, mask);
+        e1 |= zero_search(a1 ^ t0, mask);
+        e1 |= zero_search(a1 ^ t1, mask);
+    }
+    /* invert the result if requested - invert only the MSBs */
+    if (in) {
+        e0 = ~e0 & ~mask;
+        e1 = ~e1 & ~mask;
+    }
+    first_equal = match_index(e0, e1);
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    if (rt) {
+        e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
+        e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
+        s390_vec_write_element64(v1, 0, e0);
+        s390_vec_write_element64(v1, 1, e1);
+    } else {
+        s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
+        s390_vec_write_element64(v1, 1, 0);
+    }
+
+    if (first_zero == 16 && first_equal == 16) {
+        return 3; /* no match */
+    } else if (first_zero == 16) {
+        return 1; /* matching elements, no match for zero */
+    } else if (first_equal < first_zero) {
+        return 2; /* matching elements before match for zero */
+    }
+    return 0; /* match for zero */
+}
+
+#define DEF_VFAE_HELPER(BITS)                                                  \
+void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool rt = extract32(simd_data(desc), 2, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                                   \
+}
+DEF_VFAE_HELPER(8)
+DEF_VFAE_HELPER(16)
+DEF_VFAE_HELPER(32)
+
+#define DEF_VFAE_CC_HELPER(BITS)                                               \
+void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3,      \
+                                CPUS390XState *env, uint32_t desc)             \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool rt = extract32(simd_data(desc), 2, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                      \
+}
+DEF_VFAE_CC_HELPER(8)
+DEF_VFAE_CC_HELPER(16)
+DEF_VFAE_CC_HELPER(32)
+
+static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
+    uint64_t first_zero = 16;
+    uint64_t first_equal;
+
+    a0 = s390_vec_read_element64(v2, 0);
+    a1 = s390_vec_read_element64(v2, 1);
+    b0 = s390_vec_read_element64(v3, 0);
+    b1 = s390_vec_read_element64(v3, 1);
+    e0 = zero_search(a0 ^ b0, mask);
+    e1 = zero_search(a1 ^ b1, mask);
+    first_equal = match_index(e0, e1);
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
+    s390_vec_write_element64(v1, 1, 0);
+    if (first_zero == 16 && first_equal == 16) {
+        return 3; /* no match */
+    } else if (first_zero == 16) {
+        return 1; /* matching elements, no match for zero */
+    } else if (first_equal < first_zero) {
+        return 2; /* matching elements before match for zero */
+    }
+    return 0; /* match for zero */
+}
+
+#define DEF_VFEE_HELPER(BITS)                                                  \
+void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vfee(v1, v2, v3, zs, MO_##BITS);                                           \
+}
+DEF_VFEE_HELPER(8)
+DEF_VFEE_HELPER(16)
+DEF_VFEE_HELPER(32)
+
+#define DEF_VFEE_CC_HELPER(BITS)                                               \
+void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3,      \
+                                CPUS390XState *env, uint32_t desc)             \
+{                                                                              \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS);                              \
+}
+DEF_VFEE_CC_HELPER(8)
+DEF_VFEE_CC_HELPER(16)
+DEF_VFEE_CC_HELPER(32)
+
+static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
+    uint64_t first_zero = 16;
+    uint64_t first_inequal;
+    bool smaller = false;
+
+    a0 = s390_vec_read_element64(v2, 0);
+    a1 = s390_vec_read_element64(v2, 1);
+    b0 = s390_vec_read_element64(v3, 0);
+    b1 = s390_vec_read_element64(v3, 1);
+    e0 = nonzero_search(a0 ^ b0, mask);
+    e1 = nonzero_search(a1 ^ b1, mask);
+    first_inequal = match_index(e0, e1);
+
+    /* identify the smaller element */
+    if (first_inequal < 16) {
+        uint8_t enr = first_inequal / (1 << es);
+        uint32_t a = s390_vec_read_element(v2, enr, es);
+        uint32_t b = s390_vec_read_element(v3, enr, es);
+
+        smaller = a < b;
+    }
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
+    s390_vec_write_element64(v1, 1, 0);
+    if (first_zero == 16 && first_inequal == 16) {
+        return 3;
+    } else if (first_zero < first_inequal) {
+        return 0;
+    }
+    return smaller ? 1 : 2;
+}
+
+#define DEF_VFENE_HELPER(BITS)                                                 \
+void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3,        \
+                              uint32_t desc)                                   \
+{                                                                              \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vfene(v1, v2, v3, zs, MO_##BITS);                                          \
+}
+DEF_VFENE_HELPER(8)
+DEF_VFENE_HELPER(16)
+DEF_VFENE_HELPER(32)
+
+#define DEF_VFENE_CC_HELPER(BITS)                                              \
+void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 CPUS390XState *env, uint32_t desc)            \
+{                                                                              \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS);                             \
+}
+DEF_VFENE_CC_HELPER(8)
+DEF_VFENE_CC_HELPER(16)
+DEF_VFENE_CC_HELPER(32)
+
+static int vistr(void *v1, const void *v2, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0 = s390_vec_read_element64(v2, 0);
+    uint64_t a1 = s390_vec_read_element64(v2, 1);
+    uint64_t z;
+    int cc = 3;
+
+    z = zero_search(a0, mask);
+    if (z) {
+        a0 &= ~(-1ull >> clz64(z));
+        a1 = 0;
+        cc = 0;
+    } else {
+        z = zero_search(a1, mask);
+        if (z) {
+            a1 &= ~(-1ull >> clz64(z));
+            cc = 0;
+        }
+    }
+
+    s390_vec_write_element64(v1, 0, a0);
+    s390_vec_write_element64(v1, 1, a1);
+    return cc;
+}
+
+#define DEF_VISTR_HELPER(BITS)                                                 \
+void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc)         \
+{                                                                              \
+    vistr(v1, v2, MO_##BITS);                                                  \
+}
+DEF_VISTR_HELPER(8)
+DEF_VISTR_HELPER(16)
+DEF_VISTR_HELPER(32)
+
+#define DEF_VISTR_CC_HELPER(BITS)                                              \
+void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
+                                uint32_t desc)                                 \
+{                                                                              \
+    env->cc_op = vistr(v1, v2, MO_##BITS);                                     \
+}
+DEF_VISTR_CC_HELPER(8)
+DEF_VISTR_CC_HELPER(16)
+DEF_VISTR_CC_HELPER(32)
+
+static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
+{
+    const bool equal = extract32(c, 7, 1);
+    const bool lower = extract32(c, 6, 1);
+    const bool higher = extract32(c, 5, 1);
+
+    if (data < l) {
+        return lower;
+    } else if (data > l) {
+        return higher;
+    }
+    return equal;
+}
+
+static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
+                 bool in, bool rt, bool zs, uint8_t es)
+{
+    const uint64_t mask = get_element_lsbs_mask(es);
+    uint64_t a0 = s390_vec_read_element64(v2, 0);
+    uint64_t a1 = s390_vec_read_element64(v2, 1);
+    int first_zero = 16, first_match = 16;
+    S390Vector rt_result = {};
+    uint64_t z0, z1;
+    int i, j;
+
+    if (zs) {
+        z0 = zero_search(a0, mask);
+        z1 = zero_search(a1, mask);
+        first_zero = match_index(z0, z1);
+    }
+
+    for (i = 0; i < 16 / (1 << es); i++) {
+        const uint32_t data = s390_vec_read_element(v2, i, es);
+        const int cur_byte = i * (1 << es);
+        bool any_match = false;
+
+        /* if we don't need a bit vector, we can stop early */
+        if (cur_byte == first_zero && !rt) {
+            break;
+        }
+
+        for (j = 0; j < 16 / (1 << es); j += 2) {
+            const uint32_t l1 = s390_vec_read_element(v3, j, es);
+            const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
+            /* we are only interested in the highest byte of each element */
+            const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
+            const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));
+
+            if (element_compare(data, l1, c1) &&
+                element_compare(data, l2, c2)) {
+                any_match = true;
+                break;
+            }
+        }
+        /* invert the result if requested */
+        any_match = in ^ any_match;
+
+        if (any_match) {
+            /* indicate bit vector if requested */
+            if (rt) {
+                const uint64_t val = -1ull;
+
+                first_match = MIN(cur_byte, first_match);
+                s390_vec_write_element(&rt_result, i, es, val);
+            } else {
+                /* stop on the first match */
+                first_match = cur_byte;
+                break;
+            }
+        }
+    }
+
+    if (rt) {
+        *(S390Vector *)v1 = rt_result;
+    } else {
+        s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
+        s390_vec_write_element64(v1, 1, 0);
+    }
+
+    if (first_zero == 16 && first_match == 16) {
+        return 3; /* no match */
+    } else if (first_zero == 16) {
+        return 1; /* matching elements, no match for zero */
+    } else if (first_match < first_zero) {
+        return 2; /* matching elements before match for zero */
+    }
+    return 0; /* match for zero */
+}
+
+#define DEF_VSTRC_HELPER(BITS)                                                 \
+void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3,        \
+                              const void *v4, uint32_t desc)                   \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                               \
+}
+DEF_VSTRC_HELPER(8)
+DEF_VSTRC_HELPER(16)
+DEF_VSTRC_HELPER(32)
+
+#define DEF_VSTRC_RT_HELPER(BITS)                                              \
+void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 const void *v4, uint32_t desc)                \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                               \
+}
+DEF_VSTRC_RT_HELPER(8)
+DEF_VSTRC_RT_HELPER(16)
+DEF_VSTRC_RT_HELPER(32)
+
+#define DEF_VSTRC_CC_HELPER(BITS)                                              \
+void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 const void *v4, CPUS390XState *env,           \
+                                 uint32_t desc)                                \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                  \
+}
+DEF_VSTRC_CC_HELPER(8)
+DEF_VSTRC_CC_HELPER(16)
+DEF_VSTRC_CC_HELPER(32)
+
+#define DEF_VSTRC_CC_RT_HELPER(BITS)                                           \
+void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3,  \
+                                    const void *v4, CPUS390XState *env,        \
+                                    uint32_t desc)                             \
+{                                                                              \
+    const bool in = extract32(simd_data(desc), 3, 1);                          \
+    const bool zs = extract32(simd_data(desc), 1, 1);                          \
+                                                                               \
+    env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                  \
+}
+DEF_VSTRC_CC_RT_HELPER(8)
+DEF_VSTRC_CC_RT_HELPER(16)
+DEF_VSTRC_CC_RT_HELPER(32)