diff options
Diffstat (limited to 'target/s390x/tcg')
-rw-r--r-- | target/s390x/tcg/cc_helper.c | 538 | ||||
-rw-r--r-- | target/s390x/tcg/crypto_helper.c | 61 | ||||
-rw-r--r-- | target/s390x/tcg/excp_helper.c | 641 | ||||
-rw-r--r-- | target/s390x/tcg/fpu_helper.c | 976 | ||||
-rw-r--r-- | target/s390x/tcg/insn-data.def | 1398 | ||||
-rw-r--r-- | target/s390x/tcg/insn-format.def | 81 | ||||
-rw-r--r-- | target/s390x/tcg/int_helper.c | 148 | ||||
-rw-r--r-- | target/s390x/tcg/mem_helper.c | 3008 | ||||
-rw-r--r-- | target/s390x/tcg/meson.build | 14 | ||||
-rw-r--r-- | target/s390x/tcg/misc_helper.c | 785 | ||||
-rw-r--r-- | target/s390x/tcg/s390-tod.h | 29 | ||||
-rw-r--r-- | target/s390x/tcg/tcg_s390x.h | 24 | ||||
-rw-r--r-- | target/s390x/tcg/translate.c | 6672 | ||||
-rw-r--r-- | target/s390x/tcg/translate_vx.c.inc | 3109 | ||||
-rw-r--r-- | target/s390x/tcg/vec.h | 141 | ||||
-rw-r--r-- | target/s390x/tcg/vec_fpu_helper.c | 1072 | ||||
-rw-r--r-- | target/s390x/tcg/vec_helper.c | 214 | ||||
-rw-r--r-- | target/s390x/tcg/vec_int_helper.c | 587 | ||||
-rw-r--r-- | target/s390x/tcg/vec_string_helper.c | 473 |
19 files changed, 19971 insertions, 0 deletions
diff --git a/target/s390x/tcg/cc_helper.c b/target/s390x/tcg/cc_helper.c new file mode 100644 index 0000000000..c2c96c3a3c --- /dev/null +++ b/target/s390x/tcg/cc_helper.c @@ -0,0 +1,538 @@ +/* + * S/390 condition code helper routines + * + * Copyright (c) 2009 Ulrich Hecht + * Copyright (c) 2009 Alexander Graf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "s390x-internal.h" +#include "tcg_s390x.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "qemu/host-utils.h" + +/* #define DEBUG_HELPER */ +#ifdef DEBUG_HELPER +#define HELPER_LOG(x...) qemu_log(x) +#else +#define HELPER_LOG(x...) +#endif + +static uint32_t cc_calc_ltgt_32(int32_t src, int32_t dst) +{ + if (src == dst) { + return 0; + } else if (src < dst) { + return 1; + } else { + return 2; + } +} + +static uint32_t cc_calc_ltgt0_32(int32_t dst) +{ + return cc_calc_ltgt_32(dst, 0); +} + +static uint32_t cc_calc_ltgt_64(int64_t src, int64_t dst) +{ + if (src == dst) { + return 0; + } else if (src < dst) { + return 1; + } else { + return 2; + } +} + +static uint32_t cc_calc_ltgt0_64(int64_t dst) +{ + return cc_calc_ltgt_64(dst, 0); +} + +static uint32_t cc_calc_ltugtu_32(uint32_t src, uint32_t dst) +{ + if (src == dst) { + return 0; + } else if (src < dst) { + return 1; + } else { + return 2; + } +} + +static uint32_t cc_calc_ltugtu_64(uint64_t src, uint64_t dst) +{ + if (src == dst) { + return 0; + } else if (src < dst) { + return 1; + } else { + return 2; + } +} + +static uint32_t cc_calc_tm_32(uint32_t val, uint32_t mask) +{ + uint32_t r = val & mask; + + if (r == 0) { + return 0; + } else if (r == mask) { + return 3; + } else { + return 1; + } +} + +static uint32_t cc_calc_tm_64(uint64_t val, uint64_t mask) +{ + uint64_t r = val & mask; + + if (r == 0) { + return 0; + } else if (r == mask) { + return 3; + } else { + int top = clz64(mask); + if ((int64_t)(val << top) < 0) { + return 2; + } else { + return 1; + } + } +} + +static uint32_t cc_calc_nz(uint64_t dst) +{ + return !!dst; +} + +static uint32_t cc_calc_addu(uint64_t carry_out, uint64_t result) +{ + g_assert(carry_out <= 1); + return (result != 0) + 2 * carry_out; +} + +static uint32_t cc_calc_subu(uint64_t borrow_out, uint64_t result) +{ + return cc_calc_addu(borrow_out + 1, result); +} + +static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar) +{ + if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) { + return 3; /* overflow */ + } else { + if (ar < 0) { + return 1; + } else if (ar > 0) { + return 2; + } else { + return 0; + } + } +} + +static uint32_t cc_calc_sub_64(int64_t a1, int64_t a2, int64_t ar) +{ + if ((a1 > 0 && a2 < 0 && ar < 0) || (a1 < 0 && a2 > 0 && ar > 0)) { + return 3; /* overflow */ + } else { + if (ar < 0) { + return 1; + } else if (ar > 0) { + return 2; + } else { + return 0; + } + } +} + +static uint32_t cc_calc_abs_64(int64_t dst) +{ + if ((uint64_t)dst == 0x8000000000000000ULL) { + return 3; + } else if (dst) { + return 2; + } else { + return 0; + } +} + +static uint32_t cc_calc_nabs_64(int64_t dst) +{ + return !!dst; +} + +static uint32_t cc_calc_comp_64(int64_t dst) +{ + if ((uint64_t)dst == 0x8000000000000000ULL) { + return 3; + } else if (dst < 0) { + return 1; + } else if (dst > 0) { + return 2; + } else { + return 0; + } +} + + +static uint32_t cc_calc_add_32(int32_t a1, int32_t a2, int32_t ar) +{ + if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) { + return 3; /* overflow */ + } else { + if (ar < 0) { + return 1; + } else if (ar > 0) { + return 2; + } else { + return 0; + } + } +} + +static uint32_t cc_calc_sub_32(int32_t a1, int32_t a2, int32_t ar) +{ + if ((a1 > 0 && a2 < 0 && ar < 0) || (a1 < 0 && a2 > 0 && ar > 0)) { + return 3; /* overflow */ + } else { + if (ar < 0) { + return 1; + } else if (ar > 0) { + return 2; + } else { + return 0; + } + } +} + +static uint32_t cc_calc_abs_32(int32_t dst) +{ + if ((uint32_t)dst == 0x80000000UL) { + return 3; + } else if (dst) { + return 2; + } else { + return 0; + } +} + +static uint32_t cc_calc_nabs_32(int32_t dst) +{ + return !!dst; +} + +static uint32_t cc_calc_comp_32(int32_t dst) +{ + if ((uint32_t)dst == 0x80000000UL) { + return 3; + } else if (dst < 0) { + return 1; + } else if (dst > 0) { + return 2; + } else { + return 0; + } +} + +/* calculate condition code for insert character under mask insn */ +static uint32_t cc_calc_icm(uint64_t mask, uint64_t val) +{ + if ((val & mask) == 0) { + return 0; + } else { + int top = clz64(mask); + if ((int64_t)(val << top) < 0) { + return 1; + } else { + return 2; + } + } +} + +static uint32_t cc_calc_sla_32(uint32_t src, int shift) +{ + uint32_t mask = ((1U << shift) - 1U) << (32 - shift); + uint32_t sign = 1U << 31; + uint32_t match; + int32_t r; + + /* Check if the sign bit stays the same. */ + if (src & sign) { + match = mask; + } else { + match = 0; + } + if ((src & mask) != match) { + /* Overflow. */ + return 3; + } + + r = ((src << shift) & ~sign) | (src & sign); + if (r == 0) { + return 0; + } else if (r < 0) { + return 1; + } + return 2; +} + +static uint32_t cc_calc_sla_64(uint64_t src, int shift) +{ + uint64_t mask = ((1ULL << shift) - 1ULL) << (64 - shift); + uint64_t sign = 1ULL << 63; + uint64_t match; + int64_t r; + + /* Check if the sign bit stays the same. */ + if (src & sign) { + match = mask; + } else { + match = 0; + } + if ((src & mask) != match) { + /* Overflow. */ + return 3; + } + + r = ((src << shift) & ~sign) | (src & sign); + if (r == 0) { + return 0; + } else if (r < 0) { + return 1; + } + return 2; +} + +static uint32_t cc_calc_flogr(uint64_t dst) +{ + return dst ? 2 : 0; +} + +static uint32_t cc_calc_lcbb(uint64_t dst) +{ + return dst == 16 ? 0 : 3; +} + +static uint32_t cc_calc_vc(uint64_t low, uint64_t high) +{ + if (high == -1ull && low == -1ull) { + /* all elements match */ + return 0; + } else if (high == 0 && low == 0) { + /* no elements match */ + return 3; + } else { + /* some elements but not all match */ + return 1; + } +} + +static uint32_t cc_calc_muls_32(int64_t res) +{ + const int64_t tmp = res >> 31; + + if (!res) { + return 0; + } else if (tmp && tmp != -1) { + return 3; + } else if (res < 0) { + return 1; + } + return 2; +} + +static uint64_t cc_calc_muls_64(int64_t res_high, uint64_t res_low) +{ + if (!res_high && !res_low) { + return 0; + } else if (res_high + (res_low >> 63) != 0) { + return 3; + } else if (res_high < 0) { + return 1; + } + return 2; +} + +static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op, + uint64_t src, uint64_t dst, uint64_t vr) +{ + uint32_t r = 0; + + switch (cc_op) { + case CC_OP_CONST0: + case CC_OP_CONST1: + case CC_OP_CONST2: + case CC_OP_CONST3: + /* cc_op value _is_ cc */ + r = cc_op; + break; + case CC_OP_LTGT0_32: + r = cc_calc_ltgt0_32(dst); + break; + case CC_OP_LTGT0_64: + r = cc_calc_ltgt0_64(dst); + break; + case CC_OP_LTGT_32: + r = cc_calc_ltgt_32(src, dst); + break; + case CC_OP_LTGT_64: + r = cc_calc_ltgt_64(src, dst); + break; + case CC_OP_LTUGTU_32: + r = cc_calc_ltugtu_32(src, dst); + break; + case CC_OP_LTUGTU_64: + r = cc_calc_ltugtu_64(src, dst); + break; + case CC_OP_TM_32: + r = cc_calc_tm_32(src, dst); + break; + case CC_OP_TM_64: + r = cc_calc_tm_64(src, dst); + break; + case CC_OP_NZ: + r = cc_calc_nz(dst); + break; + case CC_OP_ADDU: + r = cc_calc_addu(src, dst); + break; + case CC_OP_SUBU: + r = cc_calc_subu(src, dst); + break; + case CC_OP_ADD_64: + r = cc_calc_add_64(src, dst, vr); + break; + case CC_OP_SUB_64: + r = cc_calc_sub_64(src, dst, vr); + break; + case CC_OP_ABS_64: + r = cc_calc_abs_64(dst); + break; + case CC_OP_NABS_64: + r = cc_calc_nabs_64(dst); + break; + case CC_OP_COMP_64: + r = cc_calc_comp_64(dst); + break; + case CC_OP_MULS_64: + r = cc_calc_muls_64(src, dst); + break; + + case CC_OP_ADD_32: + r = cc_calc_add_32(src, dst, vr); + break; + case CC_OP_SUB_32: + r = cc_calc_sub_32(src, dst, vr); + break; + case CC_OP_ABS_32: + r = cc_calc_abs_32(dst); + break; + case CC_OP_NABS_32: + r = cc_calc_nabs_32(dst); + break; + case CC_OP_COMP_32: + r = cc_calc_comp_32(dst); + break; + case CC_OP_MULS_32: + r = cc_calc_muls_32(dst); + break; + + case CC_OP_ICM: + r = cc_calc_icm(src, dst); + break; + case CC_OP_SLA_32: + r = cc_calc_sla_32(src, dst); + break; + case CC_OP_SLA_64: + r = cc_calc_sla_64(src, dst); + break; + case CC_OP_FLOGR: + r = cc_calc_flogr(dst); + break; + case CC_OP_LCBB: + r = cc_calc_lcbb(dst); + break; + case CC_OP_VC: + r = cc_calc_vc(src, dst); + break; + + case CC_OP_NZ_F32: + r = set_cc_nz_f32(dst); + break; + case CC_OP_NZ_F64: + r = set_cc_nz_f64(dst); + break; + case CC_OP_NZ_F128: + r = set_cc_nz_f128(make_float128(src, dst)); + break; + + default: + cpu_abort(env_cpu(env), "Unknown CC operation: %s\n", cc_name(cc_op)); + } + + HELPER_LOG("%s: %15s 0x%016lx 0x%016lx 0x%016lx = %d\n", __func__, + cc_name(cc_op), src, dst, vr, r); + return r; +} + +uint32_t calc_cc(CPUS390XState *env, uint32_t cc_op, uint64_t src, uint64_t dst, + uint64_t vr) +{ + return do_calc_cc(env, cc_op, src, dst, vr); +} + +uint32_t HELPER(calc_cc)(CPUS390XState *env, uint32_t cc_op, uint64_t src, + uint64_t dst, uint64_t vr) +{ + return do_calc_cc(env, cc_op, src, dst, vr); +} + +#ifndef CONFIG_USER_ONLY +void HELPER(load_psw)(CPUS390XState *env, uint64_t mask, uint64_t addr) +{ + s390_cpu_set_psw(env, mask, addr); + cpu_loop_exit(env_cpu(env)); +} + +void HELPER(sacf)(CPUS390XState *env, uint64_t a1) +{ + HELPER_LOG("%s: %16" PRIx64 "\n", __func__, a1); + + switch (a1 & 0xf00) { + case 0x000: + env->psw.mask &= ~PSW_MASK_ASC; + env->psw.mask |= PSW_ASC_PRIMARY; + break; + case 0x100: + env->psw.mask &= ~PSW_MASK_ASC; + env->psw.mask |= PSW_ASC_SECONDARY; + break; + case 0x300: + env->psw.mask &= ~PSW_MASK_ASC; + env->psw.mask |= PSW_ASC_HOME; + break; + default: + HELPER_LOG("unknown sacf mode: %" PRIx64 "\n", a1); + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC()); + } +} +#endif diff --git a/target/s390x/tcg/crypto_helper.c b/target/s390x/tcg/crypto_helper.c new file mode 100644 index 0000000000..138d9e7ad9 --- /dev/null +++ b/target/s390x/tcg/crypto_helper.c @@ -0,0 +1,61 @@ +/* + * s390x crypto helpers + * + * Copyright (c) 2017 Red Hat Inc + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "s390x-internal.h" +#include "tcg_s390x.h" +#include "exec/helper-proto.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" + +uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t r3, + uint32_t type) +{ + const uintptr_t ra = GETPC(); + const uint8_t mod = env->regs[0] & 0x80ULL; + const uint8_t fc = env->regs[0] & 0x7fULL; + uint8_t subfunc[16] = { 0 }; + uint64_t param_addr; + int i; + + switch (type) { + case S390_FEAT_TYPE_KMAC: + case S390_FEAT_TYPE_KIMD: + case S390_FEAT_TYPE_KLMD: + case S390_FEAT_TYPE_PCKMO: + case S390_FEAT_TYPE_PCC: + if (mod) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + break; + } + + s390_get_feat_block(type, subfunc); + if (!test_be_bit(fc, subfunc)) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + switch (fc) { + case 0: /* query subfunction */ + for (i = 0; i < 16; i++) { + param_addr = wrap_address(env, env->regs[1] + i); + cpu_stb_data_ra(env, param_addr, subfunc[i], ra); + } + break; + default: + /* we don't implement any other subfunction yet */ + g_assert_not_reached(); + } + + return 0; +} diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c new file mode 100644 index 0000000000..a61917d04f --- /dev/null +++ b/target/s390x/tcg/excp_helper.c @@ -0,0 +1,641 @@ +/* + * s390x exception / interrupt helpers + * + * Copyright (c) 2009 Ulrich Hecht + * Copyright (c) 2011 Alexander Graf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "s390x-internal.h" +#include "exec/helper-proto.h" +#include "qemu/timer.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "hw/s390x/ioinst.h" +#include "exec/address-spaces.h" +#include "tcg_s390x.h" +#ifndef CONFIG_USER_ONLY +#include "hw/s390x/s390_flic.h" +#include "hw/boards.h" +#endif + +void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, + uint32_t code, uintptr_t ra) +{ + CPUState *cs = env_cpu(env); + + cpu_restore_state(cs, ra, true); + qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n", + env->psw.addr); + trigger_pgm_exception(env, code); + cpu_loop_exit(cs); +} + +void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc, + uintptr_t ra) +{ + g_assert(dxc <= 0xff); +#if !defined(CONFIG_USER_ONLY) + /* Store the DXC into the lowcore */ + stl_phys(env_cpu(env)->as, + env->psa + offsetof(LowCore, data_exc_code), dxc); +#endif + + /* Store the DXC into the FPC if AFP is enabled */ + if (env->cregs[0] & CR0_AFP) { + env->fpc = deposit32(env->fpc, 8, 8, dxc); + } + tcg_s390_program_interrupt(env, PGM_DATA, ra); +} + +void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc, + uintptr_t ra) +{ + g_assert(vxc <= 0xff); +#if !defined(CONFIG_USER_ONLY) + /* Always store the VXC into the lowcore, without AFP it is undefined */ + stl_phys(env_cpu(env)->as, + env->psa + offsetof(LowCore, data_exc_code), vxc); +#endif + + /* Always store the VXC into the FPC, without AFP it is undefined */ + env->fpc = deposit32(env->fpc, 8, 8, vxc); + tcg_s390_program_interrupt(env, PGM_VECTOR_PROCESSING, ra); +} + +void HELPER(data_exception)(CPUS390XState *env, uint32_t dxc) +{ + tcg_s390_data_exception(env, dxc, GETPC()); +} + +#if defined(CONFIG_USER_ONLY) + +void s390_cpu_do_interrupt(CPUState *cs) +{ + cs->exception_index = -1; +} + +bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr) +{ + S390CPU *cpu = S390_CPU(cs); + + trigger_pgm_exception(&cpu->env, PGM_ADDRESSING); + /* On real machines this value is dropped into LowMem. Since this + is userland, simply put this someplace that cpu_loop can find it. */ + cpu->env.__excp_addr = address; + cpu_loop_exit_restore(cs, retaddr); +} + +#else /* !CONFIG_USER_ONLY */ + +static inline uint64_t cpu_mmu_idx_to_asc(int mmu_idx) +{ + switch (mmu_idx) { + case MMU_PRIMARY_IDX: + return PSW_ASC_PRIMARY; + case MMU_SECONDARY_IDX: + return PSW_ASC_SECONDARY; + case MMU_HOME_IDX: + return PSW_ASC_HOME; + default: + abort(); + } +} + +bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr) +{ + S390CPU *cpu = S390_CPU(cs); + CPUS390XState *env = &cpu->env; + target_ulong vaddr, raddr; + uint64_t asc, tec; + int prot, excp; + + qemu_log_mask(CPU_LOG_MMU, "%s: addr 0x%" VADDR_PRIx " rw %d mmu_idx %d\n", + __func__, address, access_type, mmu_idx); + + vaddr = address; + + if (mmu_idx < MMU_REAL_IDX) { + asc = cpu_mmu_idx_to_asc(mmu_idx); + /* 31-Bit mode */ + if (!(env->psw.mask & PSW_MASK_64)) { + vaddr &= 0x7fffffff; + } + excp = mmu_translate(env, vaddr, access_type, asc, &raddr, &prot, &tec); + } else if (mmu_idx == MMU_REAL_IDX) { + /* 31-Bit mode */ + if (!(env->psw.mask & PSW_MASK_64)) { + vaddr &= 0x7fffffff; + } + excp = mmu_translate_real(env, vaddr, access_type, &raddr, &prot, &tec); + } else { + g_assert_not_reached(); + } + + /* check out of RAM access */ + if (!excp && + !address_space_access_valid(&address_space_memory, raddr, + TARGET_PAGE_SIZE, access_type, + MEMTXATTRS_UNSPECIFIED)) { + MachineState *ms = MACHINE(qdev_get_machine()); + qemu_log_mask(CPU_LOG_MMU, + "%s: raddr %" PRIx64 " > ram_size %" PRIx64 "\n", + __func__, (uint64_t)raddr, (uint64_t)ms->ram_size); + excp = PGM_ADDRESSING; + tec = 0; /* unused */ + } + + env->tlb_fill_exc = excp; + env->tlb_fill_tec = tec; + + if (!excp) { + qemu_log_mask(CPU_LOG_MMU, + "%s: set tlb %" PRIx64 " -> %" PRIx64 " (%x)\n", + __func__, (uint64_t)vaddr, (uint64_t)raddr, prot); + tlb_set_page(cs, address & TARGET_PAGE_MASK, raddr, prot, + mmu_idx, TARGET_PAGE_SIZE); + return true; + } + if (probe) { + return false; + } + + if (excp != PGM_ADDRESSING) { + stq_phys(env_cpu(env)->as, + env->psa + offsetof(LowCore, trans_exc_code), tec); + } + + /* + * For data accesses, ILEN will be filled in from the unwind info, + * within cpu_loop_exit_restore. For code accesses, retaddr == 0, + * and so unwinding will not occur. However, ILEN is also undefined + * for that case -- we choose to set ILEN = 2. + */ + env->int_pgm_ilen = 2; + trigger_pgm_exception(env, excp); + cpu_loop_exit_restore(cs, retaddr); +} + +static void do_program_interrupt(CPUS390XState *env) +{ + uint64_t mask, addr; + LowCore *lowcore; + int ilen = env->int_pgm_ilen; + + assert(ilen == 2 || ilen == 4 || ilen == 6); + + switch (env->int_pgm_code) { + case PGM_PER: + if (env->per_perc_atmid & PER_CODE_EVENT_NULLIFICATION) { + break; + } + /* FALL THROUGH */ + case PGM_OPERATION: + case PGM_PRIVILEGED: + case PGM_EXECUTE: + case PGM_PROTECTION: + case PGM_ADDRESSING: + case PGM_SPECIFICATION: + case PGM_DATA: + case PGM_FIXPT_OVERFLOW: + case PGM_FIXPT_DIVIDE: + case PGM_DEC_OVERFLOW: + case PGM_DEC_DIVIDE: + case PGM_HFP_EXP_OVERFLOW: + case PGM_HFP_EXP_UNDERFLOW: + case PGM_HFP_SIGNIFICANCE: + case PGM_HFP_DIVIDE: + case PGM_TRANS_SPEC: + case PGM_SPECIAL_OP: + case PGM_OPERAND: + case PGM_HFP_SQRT: + case PGM_PC_TRANS_SPEC: + case PGM_ALET_SPEC: + case PGM_MONITOR: + /* advance the PSW if our exception is not nullifying */ + env->psw.addr += ilen; + break; + } + + qemu_log_mask(CPU_LOG_INT, + "%s: code=0x%x ilen=%d psw: %" PRIx64 " %" PRIx64 "\n", + __func__, env->int_pgm_code, ilen, env->psw.mask, + env->psw.addr); + + lowcore = cpu_map_lowcore(env); + + /* Signal PER events with the exception. */ + if (env->per_perc_atmid) { + env->int_pgm_code |= PGM_PER; + lowcore->per_address = cpu_to_be64(env->per_address); + lowcore->per_perc_atmid = cpu_to_be16(env->per_perc_atmid); + env->per_perc_atmid = 0; + } + + lowcore->pgm_ilen = cpu_to_be16(ilen); + lowcore->pgm_code = cpu_to_be16(env->int_pgm_code); + lowcore->program_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env)); + lowcore->program_old_psw.addr = cpu_to_be64(env->psw.addr); + mask = be64_to_cpu(lowcore->program_new_psw.mask); + addr = be64_to_cpu(lowcore->program_new_psw.addr); + lowcore->per_breaking_event_addr = cpu_to_be64(env->gbea); + + cpu_unmap_lowcore(lowcore); + + s390_cpu_set_psw(env, mask, addr); +} + +static void do_svc_interrupt(CPUS390XState *env) +{ + uint64_t mask, addr; + LowCore *lowcore; + + lowcore = cpu_map_lowcore(env); + + lowcore->svc_code = cpu_to_be16(env->int_svc_code); + lowcore->svc_ilen = cpu_to_be16(env->int_svc_ilen); + lowcore->svc_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env)); + lowcore->svc_old_psw.addr = cpu_to_be64(env->psw.addr + env->int_svc_ilen); + mask = be64_to_cpu(lowcore->svc_new_psw.mask); + addr = be64_to_cpu(lowcore->svc_new_psw.addr); + + cpu_unmap_lowcore(lowcore); + + s390_cpu_set_psw(env, mask, addr); + + /* When a PER event is pending, the PER exception has to happen + immediately after the SERVICE CALL one. */ + if (env->per_perc_atmid) { + env->int_pgm_code = PGM_PER; + env->int_pgm_ilen = env->int_svc_ilen; + do_program_interrupt(env); + } +} + +#define VIRTIO_SUBCODE_64 0x0D00 + +static void do_ext_interrupt(CPUS390XState *env) +{ + QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic()); + S390CPU *cpu = env_archcpu(env); + uint64_t mask, addr; + uint16_t cpu_addr; + LowCore *lowcore; + + if (!(env->psw.mask & PSW_MASK_EXT)) { + cpu_abort(CPU(cpu), "Ext int w/o ext mask\n"); + } + + lowcore = cpu_map_lowcore(env); + + if ((env->pending_int & INTERRUPT_EMERGENCY_SIGNAL) && + (env->cregs[0] & CR0_EMERGENCY_SIGNAL_SC)) { + MachineState *ms = MACHINE(qdev_get_machine()); + unsigned int max_cpus = ms->smp.max_cpus; + + lowcore->ext_int_code = cpu_to_be16(EXT_EMERGENCY); + cpu_addr = find_first_bit(env->emergency_signals, S390_MAX_CPUS); + g_assert(cpu_addr < S390_MAX_CPUS); + lowcore->cpu_addr = cpu_to_be16(cpu_addr); + clear_bit(cpu_addr, env->emergency_signals); + if (bitmap_empty(env->emergency_signals, max_cpus)) { + env->pending_int &= ~INTERRUPT_EMERGENCY_SIGNAL; + } + } else if ((env->pending_int & INTERRUPT_EXTERNAL_CALL) && + (env->cregs[0] & CR0_EXTERNAL_CALL_SC)) { + lowcore->ext_int_code = cpu_to_be16(EXT_EXTERNAL_CALL); + lowcore->cpu_addr = cpu_to_be16(env->external_call_addr); + env->pending_int &= ~INTERRUPT_EXTERNAL_CALL; + } else if ((env->pending_int & INTERRUPT_EXT_CLOCK_COMPARATOR) && + (env->cregs[0] & CR0_CKC_SC)) { + lowcore->ext_int_code = cpu_to_be16(EXT_CLOCK_COMP); + lowcore->cpu_addr = 0; + env->pending_int &= ~INTERRUPT_EXT_CLOCK_COMPARATOR; + } else if ((env->pending_int & INTERRUPT_EXT_CPU_TIMER) && + (env->cregs[0] & CR0_CPU_TIMER_SC)) { + lowcore->ext_int_code = cpu_to_be16(EXT_CPU_TIMER); + lowcore->cpu_addr = 0; + env->pending_int &= ~INTERRUPT_EXT_CPU_TIMER; + } else if (qemu_s390_flic_has_service(flic) && + (env->cregs[0] & CR0_SERVICE_SC)) { + uint32_t param; + + param = qemu_s390_flic_dequeue_service(flic); + lowcore->ext_int_code = cpu_to_be16(EXT_SERVICE); + lowcore->ext_params = cpu_to_be32(param); + lowcore->cpu_addr = 0; + } else { + g_assert_not_reached(); + } + + mask = be64_to_cpu(lowcore->external_new_psw.mask); + addr = be64_to_cpu(lowcore->external_new_psw.addr); + lowcore->external_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env)); + lowcore->external_old_psw.addr = cpu_to_be64(env->psw.addr); + + cpu_unmap_lowcore(lowcore); + + s390_cpu_set_psw(env, mask, addr); +} + +static void do_io_interrupt(CPUS390XState *env) +{ + QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic()); + uint64_t mask, addr; + QEMUS390FlicIO *io; + LowCore *lowcore; + + g_assert(env->psw.mask & PSW_MASK_IO); + io = qemu_s390_flic_dequeue_io(flic, env->cregs[6]); + g_assert(io); + + lowcore = cpu_map_lowcore(env); + + lowcore->subchannel_id = cpu_to_be16(io->id); + lowcore->subchannel_nr = cpu_to_be16(io->nr); + lowcore->io_int_parm = cpu_to_be32(io->parm); + lowcore->io_int_word = cpu_to_be32(io->word); + lowcore->io_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env)); + lowcore->io_old_psw.addr = cpu_to_be64(env->psw.addr); + mask = be64_to_cpu(lowcore->io_new_psw.mask); + addr = be64_to_cpu(lowcore->io_new_psw.addr); + + cpu_unmap_lowcore(lowcore); + g_free(io); + + s390_cpu_set_psw(env, mask, addr); +} + +typedef struct MchkExtSaveArea { + uint64_t vregs[32][2]; /* 0x0000 */ + uint8_t pad_0x0200[0x0400 - 0x0200]; /* 0x0200 */ +} MchkExtSaveArea; +QEMU_BUILD_BUG_ON(sizeof(MchkExtSaveArea) != 1024); + +static int mchk_store_vregs(CPUS390XState *env, uint64_t mcesao) +{ + hwaddr len = sizeof(MchkExtSaveArea); + MchkExtSaveArea *sa; + int i; + + sa = cpu_physical_memory_map(mcesao, &len, true); + if (!sa) { + return -EFAULT; + } + if (len != sizeof(MchkExtSaveArea)) { + cpu_physical_memory_unmap(sa, len, 1, 0); + return -EFAULT; + } + + for (i = 0; i < 32; i++) { + sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0]); + sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1]); + } + + cpu_physical_memory_unmap(sa, len, 1, len); + return 0; +} + +static void do_mchk_interrupt(CPUS390XState *env) +{ + QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic()); + uint64_t mcic = s390_build_validity_mcic() | MCIC_SC_CP; + uint64_t mask, addr, mcesao = 0; + LowCore *lowcore; + int i; + + /* for now we only support channel report machine checks (floating) */ + g_assert(env->psw.mask & PSW_MASK_MCHECK); + g_assert(env->cregs[14] & CR14_CHANNEL_REPORT_SC); + + qemu_s390_flic_dequeue_crw_mchk(flic); + + lowcore = cpu_map_lowcore(env); + + /* extended save area */ + if (mcic & MCIC_VB_VR) { + /* length and alignment is 1024 bytes */ + mcesao = be64_to_cpu(lowcore->mcesad) & ~0x3ffull; + } + + /* try to store vector registers */ + if (!mcesao || mchk_store_vregs(env, mcesao)) { + mcic &= ~MCIC_VB_VR; + } + + /* we are always in z/Architecture mode */ + lowcore->ar_access_id = 1; + + for (i = 0; i < 16; i++) { + lowcore->floating_pt_save_area[i] = cpu_to_be64(*get_freg(env, i)); + lowcore->gpregs_save_area[i] = cpu_to_be64(env->regs[i]); + lowcore->access_regs_save_area[i] = cpu_to_be32(env->aregs[i]); + lowcore->cregs_save_area[i] = cpu_to_be64(env->cregs[i]); + } + lowcore->prefixreg_save_area = cpu_to_be32(env->psa); + lowcore->fpt_creg_save_area = cpu_to_be32(env->fpc); + lowcore->tod_progreg_save_area = cpu_to_be32(env->todpr); + lowcore->cpu_timer_save_area = cpu_to_be64(env->cputm); + lowcore->clock_comp_save_area = cpu_to_be64(env->ckc >> 8); + + lowcore->mcic = cpu_to_be64(mcic); + lowcore->mcck_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env)); + lowcore->mcck_old_psw.addr = cpu_to_be64(env->psw.addr); + mask = be64_to_cpu(lowcore->mcck_new_psw.mask); + addr = be64_to_cpu(lowcore->mcck_new_psw.addr); + + cpu_unmap_lowcore(lowcore); + + s390_cpu_set_psw(env, mask, addr); +} + +void s390_cpu_do_interrupt(CPUState *cs) +{ + QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic()); + S390CPU *cpu = S390_CPU(cs); + CPUS390XState *env = &cpu->env; + bool stopped = false; + + qemu_log_mask(CPU_LOG_INT, "%s: %d at psw=%" PRIx64 ":%" PRIx64 "\n", + __func__, cs->exception_index, env->psw.mask, env->psw.addr); + +try_deliver: + /* handle machine checks */ + if (cs->exception_index == -1 && s390_cpu_has_mcck_int(cpu)) { + cs->exception_index = EXCP_MCHK; + } + /* handle external interrupts */ + if (cs->exception_index == -1 && s390_cpu_has_ext_int(cpu)) { + cs->exception_index = EXCP_EXT; + } + /* handle I/O interrupts */ + if (cs->exception_index == -1 && s390_cpu_has_io_int(cpu)) { + cs->exception_index = EXCP_IO; + } + /* RESTART interrupt */ + if (cs->exception_index == -1 && s390_cpu_has_restart_int(cpu)) { + cs->exception_index = EXCP_RESTART; + } + /* STOP interrupt has least priority */ + if (cs->exception_index == -1 && s390_cpu_has_stop_int(cpu)) { + cs->exception_index = EXCP_STOP; + } + + switch (cs->exception_index) { + case EXCP_PGM: + do_program_interrupt(env); + break; + case EXCP_SVC: + do_svc_interrupt(env); + break; + case EXCP_EXT: + do_ext_interrupt(env); + break; + case EXCP_IO: + do_io_interrupt(env); + break; + case EXCP_MCHK: + do_mchk_interrupt(env); + break; + case EXCP_RESTART: + do_restart_interrupt(env); + break; + case EXCP_STOP: + do_stop_interrupt(env); + stopped = true; + break; + } + + if (cs->exception_index != -1 && !stopped) { + /* check if there are more pending interrupts to deliver */ + cs->exception_index = -1; + goto try_deliver; + } + cs->exception_index = -1; + + /* we might still have pending interrupts, but not deliverable */ + if (!env->pending_int && !qemu_s390_flic_has_any(flic)) { + cs->interrupt_request &= ~CPU_INTERRUPT_HARD; + } + + /* WAIT PSW during interrupt injection or STOP interrupt */ + if ((env->psw.mask & PSW_MASK_WAIT) || stopped) { + /* don't trigger a cpu_loop_exit(), use an interrupt instead */ + cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HALT); + } else if (cs->halted) { + /* unhalt if we had a WAIT PSW somehwere in our injection chain */ + s390_cpu_unhalt(cpu); + } +} + +bool s390_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +{ + if (interrupt_request & CPU_INTERRUPT_HARD) { + S390CPU *cpu = S390_CPU(cs); + CPUS390XState *env = &cpu->env; + + if (env->ex_value) { + /* Execution of the target insn is indivisible from + the parent EXECUTE insn. */ + return false; + } + if (s390_cpu_has_int(cpu)) { + s390_cpu_do_interrupt(cs); + return true; + } + if (env->psw.mask & PSW_MASK_WAIT) { + /* Woken up because of a floating interrupt but it has already + * been delivered. Go back to sleep. */ + cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HALT); + } + } + return false; +} + +void s390x_cpu_debug_excp_handler(CPUState *cs) +{ + S390CPU *cpu = S390_CPU(cs); + CPUS390XState *env = &cpu->env; + CPUWatchpoint *wp_hit = cs->watchpoint_hit; + + if (wp_hit && wp_hit->flags & BP_CPU) { + /* FIXME: When the storage-alteration-space control bit is set, + the exception should only be triggered if the memory access + is done using an address space with the storage-alteration-event + bit set. We have no way to detect that with the current + watchpoint code. */ + cs->watchpoint_hit = NULL; + + env->per_address = env->psw.addr; + env->per_perc_atmid |= PER_CODE_EVENT_STORE | get_per_atmid(env); + /* FIXME: We currently no way to detect the address space used + to trigger the watchpoint. For now just consider it is the + current default ASC. This turn to be true except when MVCP + and MVCS instrutions are not used. */ + env->per_perc_atmid |= env->psw.mask & (PSW_MASK_ASC) >> 46; + + /* + * Remove all watchpoints to re-execute the code. A PER exception + * will be triggered, it will call s390_cpu_set_psw which will + * recompute the watchpoints. + */ + cpu_watchpoint_remove_all(cs, BP_CPU); + cpu_loop_exit_noexc(cs); + } +} + +/* Unaligned accesses are only diagnosed with MO_ALIGN. At the moment, + this is only for the atomic operations, for which we want to raise a + specification exception. */ +void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) +{ + S390CPU *cpu = S390_CPU(cs); + CPUS390XState *env = &cpu->env; + + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, retaddr); +} + +static void QEMU_NORETURN monitor_event(CPUS390XState *env, + uint64_t monitor_code, + uint8_t monitor_class, uintptr_t ra) +{ + /* Store the Monitor Code and the Monitor Class Number into the lowcore */ + stq_phys(env_cpu(env)->as, + env->psa + offsetof(LowCore, monitor_code), monitor_code); + stw_phys(env_cpu(env)->as, + env->psa + offsetof(LowCore, mon_class_num), monitor_class); + + tcg_s390_program_interrupt(env, PGM_MONITOR, ra); +} + +void HELPER(monitor_call)(CPUS390XState *env, uint64_t monitor_code, + uint32_t monitor_class) +{ + g_assert(monitor_class <= 0xff); + + if (env->cregs[8] & (0x8000 >> monitor_class)) { + monitor_event(env, monitor_code, monitor_class, GETPC()); + } +} + +#endif /* !CONFIG_USER_ONLY */ diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c new file mode 100644 index 0000000000..4067205405 --- /dev/null +++ b/target/s390x/tcg/fpu_helper.c @@ -0,0 +1,976 @@ +/* + * S/390 FPU helper routines + * + * Copyright (c) 2009 Ulrich Hecht + * Copyright (c) 2009 Alexander Graf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "s390x-internal.h" +#include "tcg_s390x.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "exec/helper-proto.h" +#include "fpu/softfloat.h" + +/* #define DEBUG_HELPER */ +#ifdef DEBUG_HELPER +#define HELPER_LOG(x...) qemu_log(x) +#else +#define HELPER_LOG(x...) +#endif + +#define RET128(F) (env->retxl = F.low, F.high) + +uint8_t s390_softfloat_exc_to_ieee(unsigned int exc) +{ + uint8_t s390_exc = 0; + + s390_exc |= (exc & float_flag_invalid) ? S390_IEEE_MASK_INVALID : 0; + s390_exc |= (exc & float_flag_divbyzero) ? S390_IEEE_MASK_DIVBYZERO : 0; + s390_exc |= (exc & float_flag_overflow) ? S390_IEEE_MASK_OVERFLOW : 0; + s390_exc |= (exc & float_flag_underflow) ? S390_IEEE_MASK_UNDERFLOW : 0; + s390_exc |= (exc & float_flag_inexact) ? S390_IEEE_MASK_INEXACT : 0; + + return s390_exc; +} + +/* Should be called after any operation that may raise IEEE exceptions. */ +static void handle_exceptions(CPUS390XState *env, bool XxC, uintptr_t retaddr) +{ + unsigned s390_exc, qemu_exc; + + /* Get the exceptions raised by the current operation. Reset the + fpu_status contents so that the next operation has a clean slate. */ + qemu_exc = env->fpu_status.float_exception_flags; + if (qemu_exc == 0) { + return; + } + env->fpu_status.float_exception_flags = 0; + s390_exc = s390_softfloat_exc_to_ieee(qemu_exc); + + /* + * IEEE-Underflow exception recognition exists if a tininess condition + * (underflow) exists and + * - The mask bit in the FPC is zero and the result is inexact + * - The mask bit in the FPC is one + * So tininess conditions that are not inexact don't trigger any + * underflow action in case the mask bit is not one. + */ + if (!(s390_exc & S390_IEEE_MASK_INEXACT) && + !((env->fpc >> 24) & S390_IEEE_MASK_UNDERFLOW)) { + s390_exc &= ~S390_IEEE_MASK_UNDERFLOW; + } + + /* + * FIXME: + * 1. Right now, all inexact conditions are inidicated as + * "truncated" (0) and never as "incremented" (1) in the DXC. + * 2. Only traps due to invalid/divbyzero are suppressing. Other traps + * are completing, meaning the target register has to be written! + * This, however will mean that we have to write the register before + * triggering the trap - impossible right now. + */ + + /* + * invalid/divbyzero cannot coexist with other conditions. + * overflow/underflow however can coexist with inexact, we have to + * handle it separatly. + */ + if (s390_exc & ~S390_IEEE_MASK_INEXACT) { + if (s390_exc & ~S390_IEEE_MASK_INEXACT & env->fpc >> 24) { + /* trap condition - inexact reported along */ + tcg_s390_data_exception(env, s390_exc, retaddr); + } + /* nontrap condition - inexact handled differently */ + env->fpc |= (s390_exc & ~S390_IEEE_MASK_INEXACT) << 16; + } + + /* inexact handling */ + if (s390_exc & S390_IEEE_MASK_INEXACT && !XxC) { + /* trap condition - overflow/underflow _not_ reported along */ + if (s390_exc & S390_IEEE_MASK_INEXACT & env->fpc >> 24) { + tcg_s390_data_exception(env, s390_exc & S390_IEEE_MASK_INEXACT, + retaddr); + } + /* nontrap condition */ + env->fpc |= (s390_exc & S390_IEEE_MASK_INEXACT) << 16; + } +} + +int float_comp_to_cc(CPUS390XState *env, FloatRelation float_compare) +{ + switch (float_compare) { + case float_relation_equal: + return 0; + case float_relation_less: + return 1; + case float_relation_greater: + return 2; + case float_relation_unordered: + return 3; + default: + cpu_abort(env_cpu(env), "unknown return value for float compare\n"); + } +} + +/* condition codes for unary FP ops */ +uint32_t set_cc_nz_f32(float32 v) +{ + if (float32_is_any_nan(v)) { + return 3; + } else if (float32_is_zero(v)) { + return 0; + } else if (float32_is_neg(v)) { + return 1; + } else { + return 2; + } +} + +uint32_t set_cc_nz_f64(float64 v) +{ + if (float64_is_any_nan(v)) { + return 3; + } else if (float64_is_zero(v)) { + return 0; + } else if (float64_is_neg(v)) { + return 1; + } else { + return 2; + } +} + +uint32_t set_cc_nz_f128(float128 v) +{ + if (float128_is_any_nan(v)) { + return 3; + } else if (float128_is_zero(v)) { + return 0; + } else if (float128_is_neg(v)) { + return 1; + } else { + return 2; + } +} + +/* condition codes for FP to integer conversion ops */ +static uint32_t set_cc_conv_f32(float32 v, float_status *stat) +{ + if (stat->float_exception_flags & float_flag_invalid) { + return 3; + } else { + return set_cc_nz_f32(v); + } +} + +static uint32_t set_cc_conv_f64(float64 v, float_status *stat) +{ + if (stat->float_exception_flags & float_flag_invalid) { + return 3; + } else { + return set_cc_nz_f64(v); + } +} + +static uint32_t set_cc_conv_f128(float128 v, float_status *stat) +{ + if (stat->float_exception_flags & float_flag_invalid) { + return 3; + } else { + return set_cc_nz_f128(v); + } +} + +static inline uint8_t round_from_m34(uint32_t m34) +{ + return extract32(m34, 0, 4); +} + +static inline bool xxc_from_m34(uint32_t m34) +{ + /* XxC is bit 1 of m4 */ + return extract32(m34, 4 + 3 - 1, 1); +} + +/* 32-bit FP addition */ +uint64_t HELPER(aeb)(CPUS390XState *env, uint64_t f1, uint64_t f2) +{ + float32 ret = float32_add(f1, f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* 64-bit FP addition */ +uint64_t HELPER(adb)(CPUS390XState *env, uint64_t f1, uint64_t f2) +{ + float64 ret = float64_add(f1, f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* 128-bit FP addition */ +uint64_t HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al, + uint64_t bh, uint64_t bl) +{ + float128 ret = float128_add(make_float128(ah, al), + make_float128(bh, bl), + &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return RET128(ret); +} + +/* 32-bit FP subtraction */ +uint64_t HELPER(seb)(CPUS390XState *env, uint64_t f1, uint64_t f2) +{ + float32 ret = float32_sub(f1, f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* 64-bit FP subtraction */ +uint64_t HELPER(sdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) +{ + float64 ret = float64_sub(f1, f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* 128-bit FP subtraction */ +uint64_t HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al, + uint64_t bh, uint64_t bl) +{ + float128 ret = float128_sub(make_float128(ah, al), + make_float128(bh, bl), + &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return RET128(ret); +} + +/* 32-bit FP division */ +uint64_t HELPER(deb)(CPUS390XState *env, uint64_t f1, uint64_t f2) +{ + float32 ret = float32_div(f1, f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* 64-bit FP division */ +uint64_t HELPER(ddb)(CPUS390XState *env, uint64_t f1, uint64_t f2) +{ + float64 ret = float64_div(f1, f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* 128-bit FP division */ +uint64_t HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al, + uint64_t bh, uint64_t bl) +{ + float128 ret = float128_div(make_float128(ah, al), + make_float128(bh, bl), + &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return RET128(ret); +} + +/* 32-bit FP multiplication */ +uint64_t HELPER(meeb)(CPUS390XState *env, uint64_t f1, uint64_t f2) +{ + float32 ret = float32_mul(f1, f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* 64-bit FP multiplication */ +uint64_t HELPER(mdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) +{ + float64 ret = float64_mul(f1, f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* 64/32-bit FP multiplication */ +uint64_t HELPER(mdeb)(CPUS390XState *env, uint64_t f1, uint64_t f2) +{ + float64 ret = float32_to_float64(f2, &env->fpu_status); + ret = float64_mul(f1, ret, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* 128-bit FP multiplication */ +uint64_t HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al, + uint64_t bh, uint64_t bl) +{ + float128 ret = float128_mul(make_float128(ah, al), + make_float128(bh, bl), + &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return RET128(ret); +} + +/* 128/64-bit FP multiplication */ +uint64_t HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al, + uint64_t f2) +{ + float128 ret = float64_to_float128(f2, &env->fpu_status); + ret = float128_mul(make_float128(ah, al), ret, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return RET128(ret); +} + +/* convert 32-bit float to 64-bit float */ +uint64_t HELPER(ldeb)(CPUS390XState *env, uint64_t f2) +{ + float64 ret = float32_to_float64(f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* convert 128-bit float to 64-bit float */ +uint64_t HELPER(ldxb)(CPUS390XState *env, uint64_t ah, uint64_t al, + uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float64 ret = float128_to_float64(make_float128(ah, al), &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + return ret; +} + +/* convert 64-bit float to 128-bit float */ +uint64_t HELPER(lxdb)(CPUS390XState *env, uint64_t f2) +{ + float128 ret = float64_to_float128(f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return RET128(ret); +} + +/* convert 32-bit float to 128-bit float */ +uint64_t HELPER(lxeb)(CPUS390XState *env, uint64_t f2) +{ + float128 ret = float32_to_float128(f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return RET128(ret); +} + +/* convert 64-bit float to 32-bit float */ +uint64_t HELPER(ledb)(CPUS390XState *env, uint64_t f2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float32 ret = float64_to_float32(f2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + return ret; +} + +/* convert 128-bit float to 32-bit float */ +uint64_t HELPER(lexb)(CPUS390XState *env, uint64_t ah, uint64_t al, + uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float32 ret = float128_to_float32(make_float128(ah, al), &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + return ret; +} + +/* 32-bit FP compare */ +uint32_t HELPER(ceb)(CPUS390XState *env, uint64_t f1, uint64_t f2) +{ + FloatRelation cmp = float32_compare_quiet(f1, f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return float_comp_to_cc(env, cmp); +} + +/* 64-bit FP compare */ +uint32_t HELPER(cdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) +{ + FloatRelation cmp = float64_compare_quiet(f1, f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return float_comp_to_cc(env, cmp); +} + +/* 128-bit FP compare */ +uint32_t HELPER(cxb)(CPUS390XState *env, uint64_t ah, uint64_t al, + uint64_t bh, uint64_t bl) +{ + FloatRelation cmp = float128_compare_quiet(make_float128(ah, al), + make_float128(bh, bl), + &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return float_comp_to_cc(env, cmp); +} + +int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3) +{ + int ret = env->fpu_status.float_rounding_mode; + + switch (m3) { + case 0: + /* current mode */ + break; + case 1: + /* round to nearest with ties away from 0 */ + set_float_rounding_mode(float_round_ties_away, &env->fpu_status); + break; + case 3: + /* round to prepare for shorter precision */ + set_float_rounding_mode(float_round_to_odd, &env->fpu_status); + break; + case 4: + /* round to nearest with ties to even */ + set_float_rounding_mode(float_round_nearest_even, &env->fpu_status); + break; + case 5: + /* round to zero */ + set_float_rounding_mode(float_round_to_zero, &env->fpu_status); + break; + case 6: + /* round to +inf */ + set_float_rounding_mode(float_round_up, &env->fpu_status); + break; + case 7: + /* round to -inf */ + set_float_rounding_mode(float_round_down, &env->fpu_status); + break; + default: + g_assert_not_reached(); + } + return ret; +} + +void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode) +{ + set_float_rounding_mode(old_mode, &env->fpu_status); +} + +/* convert 64-bit int to 32-bit float */ +uint64_t HELPER(cegb)(CPUS390XState *env, int64_t v2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float32 ret = int64_to_float32(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + return ret; +} + +/* convert 64-bit int to 64-bit float */ +uint64_t HELPER(cdgb)(CPUS390XState *env, int64_t v2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float64 ret = int64_to_float64(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + return ret; +} + +/* convert 64-bit int to 128-bit float */ +uint64_t HELPER(cxgb)(CPUS390XState *env, int64_t v2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float128 ret = int64_to_float128(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + return RET128(ret); +} + +/* convert 64-bit uint to 32-bit float */ +uint64_t HELPER(celgb)(CPUS390XState *env, uint64_t v2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float32 ret = uint64_to_float32(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + return ret; +} + +/* convert 64-bit uint to 64-bit float */ +uint64_t HELPER(cdlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float64 ret = uint64_to_float64(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + return ret; +} + +/* convert 64-bit uint to 128-bit float */ +uint64_t HELPER(cxlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float128 ret = uint64_to_float128(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + return RET128(ret); +} + +/* convert 32-bit float to 64-bit int */ +uint64_t HELPER(cgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + int64_t ret = float32_to_int64(v2, &env->fpu_status); + uint32_t cc = set_cc_conv_f32(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + env->cc_op = cc; + if (float32_is_any_nan(v2)) { + return INT64_MIN; + } + return ret; +} + +/* convert 64-bit float to 64-bit int */ +uint64_t HELPER(cgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + int64_t ret = float64_to_int64(v2, &env->fpu_status); + uint32_t cc = set_cc_conv_f64(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + env->cc_op = cc; + if (float64_is_any_nan(v2)) { + return INT64_MIN; + } + return ret; +} + +/* convert 128-bit float to 64-bit int */ +uint64_t HELPER(cgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float128 v2 = make_float128(h, l); + int64_t ret = float128_to_int64(v2, &env->fpu_status); + uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + env->cc_op = cc; + if (float128_is_any_nan(v2)) { + return INT64_MIN; + } + return ret; +} + +/* convert 32-bit float to 32-bit int */ +uint64_t HELPER(cfeb)(CPUS390XState *env, uint64_t v2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + int32_t ret = float32_to_int32(v2, &env->fpu_status); + uint32_t cc = set_cc_conv_f32(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + env->cc_op = cc; + if (float32_is_any_nan(v2)) { + return INT32_MIN; + } + return ret; +} + +/* convert 64-bit float to 32-bit int */ +uint64_t HELPER(cfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + int32_t ret = float64_to_int32(v2, &env->fpu_status); + uint32_t cc = set_cc_conv_f64(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + env->cc_op = cc; + if (float64_is_any_nan(v2)) { + return INT32_MIN; + } + return ret; +} + +/* convert 128-bit float to 32-bit int */ +uint64_t HELPER(cfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float128 v2 = make_float128(h, l); + int32_t ret = float128_to_int32(v2, &env->fpu_status); + uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + env->cc_op = cc; + if (float128_is_any_nan(v2)) { + return INT32_MIN; + } + return ret; +} + +/* convert 32-bit float to 64-bit uint */ +uint64_t HELPER(clgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + uint64_t ret = float32_to_uint64(v2, &env->fpu_status); + uint32_t cc = set_cc_conv_f32(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + env->cc_op = cc; + if (float32_is_any_nan(v2)) { + return 0; + } + return ret; +} + +/* convert 64-bit float to 64-bit uint */ +uint64_t HELPER(clgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + uint64_t ret = float64_to_uint64(v2, &env->fpu_status); + uint32_t cc = set_cc_conv_f64(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + env->cc_op = cc; + if (float64_is_any_nan(v2)) { + return 0; + } + return ret; +} + +/* convert 128-bit float to 64-bit uint */ +uint64_t HELPER(clgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float128 v2 = make_float128(h, l); + uint64_t ret = float128_to_uint64(v2, &env->fpu_status); + uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + env->cc_op = cc; + if (float128_is_any_nan(v2)) { + return 0; + } + return ret; +} + +/* convert 32-bit float to 32-bit uint */ +uint64_t HELPER(clfeb)(CPUS390XState *env, uint64_t v2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + uint32_t ret = float32_to_uint32(v2, &env->fpu_status); + uint32_t cc = set_cc_conv_f32(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + env->cc_op = cc; + if (float32_is_any_nan(v2)) { + return 0; + } + return ret; +} + +/* convert 64-bit float to 32-bit uint */ +uint64_t HELPER(clfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + uint32_t ret = float64_to_uint32(v2, &env->fpu_status); + uint32_t cc = set_cc_conv_f64(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + env->cc_op = cc; + if (float64_is_any_nan(v2)) { + return 0; + } + return ret; +} + +/* convert 128-bit float to 32-bit uint */ +uint64_t HELPER(clfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float128 v2 = make_float128(h, l); + uint32_t ret = float128_to_uint32(v2, &env->fpu_status); + uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + env->cc_op = cc; + if (float128_is_any_nan(v2)) { + return 0; + } + return ret; +} + +/* round to integer 32-bit */ +uint64_t HELPER(fieb)(CPUS390XState *env, uint64_t f2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float32 ret = float32_round_to_int(f2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + return ret; +} + +/* round to integer 64-bit */ +uint64_t HELPER(fidb)(CPUS390XState *env, uint64_t f2, uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float64 ret = float64_round_to_int(f2, &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + return ret; +} + +/* round to integer 128-bit */ +uint64_t HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al, + uint32_t m34) +{ + int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34)); + float128 ret = float128_round_to_int(make_float128(ah, al), + &env->fpu_status); + + s390_restore_bfp_rounding_mode(env, old_mode); + handle_exceptions(env, xxc_from_m34(m34), GETPC()); + return RET128(ret); +} + +/* 32-bit FP compare and signal */ +uint32_t HELPER(keb)(CPUS390XState *env, uint64_t f1, uint64_t f2) +{ + FloatRelation cmp = float32_compare(f1, f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return float_comp_to_cc(env, cmp); +} + +/* 64-bit FP compare and signal */ +uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2) +{ + FloatRelation cmp = float64_compare(f1, f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return float_comp_to_cc(env, cmp); +} + +/* 128-bit FP compare and signal */ +uint32_t HELPER(kxb)(CPUS390XState *env, uint64_t ah, uint64_t al, + uint64_t bh, uint64_t bl) +{ + FloatRelation cmp = float128_compare(make_float128(ah, al), + make_float128(bh, bl), + &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return float_comp_to_cc(env, cmp); +} + +/* 32-bit FP multiply and add */ +uint64_t HELPER(maeb)(CPUS390XState *env, uint64_t f1, + uint64_t f2, uint64_t f3) +{ + float32 ret = float32_muladd(f2, f3, f1, 0, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* 64-bit FP multiply and add */ +uint64_t HELPER(madb)(CPUS390XState *env, uint64_t f1, + uint64_t f2, uint64_t f3) +{ + float64 ret = float64_muladd(f2, f3, f1, 0, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* 32-bit FP multiply and subtract */ +uint64_t HELPER(mseb)(CPUS390XState *env, uint64_t f1, + uint64_t f2, uint64_t f3) +{ + float32 ret = float32_muladd(f2, f3, f1, float_muladd_negate_c, + &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* 64-bit FP multiply and subtract */ +uint64_t HELPER(msdb)(CPUS390XState *env, uint64_t f1, + uint64_t f2, uint64_t f3) +{ + float64 ret = float64_muladd(f2, f3, f1, float_muladd_negate_c, + &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* The rightmost bit has the number 11. */ +static inline uint16_t dcmask(int bit, bool neg) +{ + return 1 << (11 - bit - neg); +} + +#define DEF_FLOAT_DCMASK(_TYPE) \ +uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1) \ +{ \ + const bool neg = _TYPE##_is_neg(f1); \ + \ + /* Sorted by most common cases - only one class is possible */ \ + if (_TYPE##_is_normal(f1)) { \ + return dcmask(2, neg); \ + } else if (_TYPE##_is_zero(f1)) { \ + return dcmask(0, neg); \ + } else if (_TYPE##_is_denormal(f1)) { \ + return dcmask(4, neg); \ + } else if (_TYPE##_is_infinity(f1)) { \ + return dcmask(6, neg); \ + } else if (_TYPE##_is_quiet_nan(f1, &env->fpu_status)) { \ + return dcmask(8, neg); \ + } \ + /* signaling nan, as last remaining case */ \ + return dcmask(10, neg); \ +} +DEF_FLOAT_DCMASK(float32) +DEF_FLOAT_DCMASK(float64) +DEF_FLOAT_DCMASK(float128) + +/* test data class 32-bit */ +uint32_t HELPER(tceb)(CPUS390XState *env, uint64_t f1, uint64_t m2) +{ + return (m2 & float32_dcmask(env, f1)) != 0; +} + +/* test data class 64-bit */ +uint32_t HELPER(tcdb)(CPUS390XState *env, uint64_t v1, uint64_t m2) +{ + return (m2 & float64_dcmask(env, v1)) != 0; +} + +/* test data class 128-bit */ +uint32_t HELPER(tcxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t m2) +{ + return (m2 & float128_dcmask(env, make_float128(ah, al))) != 0; +} + +/* square root 32-bit */ +uint64_t HELPER(sqeb)(CPUS390XState *env, uint64_t f2) +{ + float32 ret = float32_sqrt(f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* square root 64-bit */ +uint64_t HELPER(sqdb)(CPUS390XState *env, uint64_t f2) +{ + float64 ret = float64_sqrt(f2, &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return ret; +} + +/* square root 128-bit */ +uint64_t HELPER(sqxb)(CPUS390XState *env, uint64_t ah, uint64_t al) +{ + float128 ret = float128_sqrt(make_float128(ah, al), &env->fpu_status); + handle_exceptions(env, false, GETPC()); + return RET128(ret); +} + +static const int fpc_to_rnd[8] = { + float_round_nearest_even, + float_round_to_zero, + float_round_up, + float_round_down, + -1, + -1, + -1, + float_round_to_odd, +}; + +/* set fpc */ +void HELPER(sfpc)(CPUS390XState *env, uint64_t fpc) +{ + if (fpc_to_rnd[fpc & 0x7] == -1 || fpc & 0x03030088u || + (!s390_has_feat(S390_FEAT_FLOATING_POINT_EXT) && fpc & 0x4)) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC()); + } + + /* Install everything in the main FPC. */ + env->fpc = fpc; + + /* Install the rounding mode in the shadow fpu_status. */ + set_float_rounding_mode(fpc_to_rnd[fpc & 0x7], &env->fpu_status); +} + +/* set fpc and signal */ +void HELPER(sfas)(CPUS390XState *env, uint64_t fpc) +{ + uint32_t signalling = env->fpc; + uint32_t s390_exc; + + if (fpc_to_rnd[fpc & 0x7] == -1 || fpc & 0x03030088u || + (!s390_has_feat(S390_FEAT_FLOATING_POINT_EXT) && fpc & 0x4)) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC()); + } + + /* + * FPC is set to the FPC operand with a bitwise OR of the signalling + * flags. + */ + env->fpc = fpc | (signalling & 0x00ff0000); + set_float_rounding_mode(fpc_to_rnd[fpc & 0x7], &env->fpu_status); + + /* + * If any signaling flag is enabled in the new FPC mask, a + * simulated-iee-exception exception occurs. + */ + s390_exc = (signalling >> 16) & (fpc >> 24); + if (s390_exc) { + if (s390_exc & S390_IEEE_MASK_INVALID) { + s390_exc = S390_IEEE_MASK_INVALID; + } else if (s390_exc & S390_IEEE_MASK_DIVBYZERO) { + s390_exc = S390_IEEE_MASK_DIVBYZERO; + } else if (s390_exc & S390_IEEE_MASK_OVERFLOW) { + s390_exc &= (S390_IEEE_MASK_OVERFLOW | S390_IEEE_MASK_INEXACT); + } else if (s390_exc & S390_IEEE_MASK_UNDERFLOW) { + s390_exc &= (S390_IEEE_MASK_UNDERFLOW | S390_IEEE_MASK_INEXACT); + } else if (s390_exc & S390_IEEE_MASK_INEXACT) { + s390_exc = S390_IEEE_MASK_INEXACT; + } else if (s390_exc & S390_IEEE_MASK_QUANTUM) { + s390_exc = S390_IEEE_MASK_QUANTUM; + } + tcg_s390_data_exception(env, s390_exc | 3, GETPC()); + } +} + +/* set bfp rounding mode */ +void HELPER(srnm)(CPUS390XState *env, uint64_t rnd) +{ + if (rnd > 0x7 || fpc_to_rnd[rnd & 0x7] == -1) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC()); + } + + env->fpc = deposit32(env->fpc, 0, 3, rnd); + set_float_rounding_mode(fpc_to_rnd[rnd & 0x7], &env->fpu_status); +} diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def new file mode 100644 index 0000000000..3e5594210c --- /dev/null +++ b/target/s390x/tcg/insn-data.def @@ -0,0 +1,1398 @@ +/* + * Arguments to the opcode prototypes + * + * C(OPC, NAME, FMT, FAC, I1, I2, P, W, OP, CC) + * D(OPC, NAME, FMT, FAC, I1, I2, P, W, OP, CC, DATA) + * E(OPC, NAME, FMT, FAC, I1, I2, P, W, OP, CC, DATA, FLAGS) + * F(OPC, NAME, FMT, FAC, I1, I2, P, W, OP, CC, FLAGS) + * + * OPC = (op << 8) | op2 where op is the major, op2 the minor opcode + * NAME = name of the opcode, used internally + * FMT = format of the opcode (defined in insn-format.def) + * FAC = facility the opcode is available in (defined in DisasFacility) + * I1 = func in1_xx fills o->in1 + * I2 = func in2_xx fills o->in2 + * P = func prep_xx initializes o->*out* + * W = func wout_xx writes o->*out* somewhere + * OP = func op_xx does the bulk of the operation + * CC = func cout_xx defines how cc should get set + * DATA = immediate argument to op_xx function + * FLAGS = categorize the type of instruction (e.g. for advanced checks) + * + * The helpers get called in order: I1, I2, P, OP, W, CC + */ + +/* ADD */ + C(0x1a00, AR, RR_a, Z, r1, r2, new, r1_32, add, adds32) + C(0xb9f8, ARK, RRF_a, DO, r2, r3, new, r1_32, add, adds32) + C(0x5a00, A, RX_a, Z, r1, m2_32s, new, r1_32, add, adds32) + C(0xe35a, AY, RXY_a, LD, r1, m2_32s, new, r1_32, add, adds32) + C(0xb908, AGR, RRE, Z, r1, r2, r1, 0, add, adds64) + C(0xb918, AGFR, RRE, Z, r1, r2_32s, r1, 0, add, adds64) + C(0xb9e8, AGRK, RRF_a, DO, r2, r3, r1, 0, add, adds64) + C(0xe308, AG, RXY_a, Z, r1, m2_64, r1, 0, add, adds64) + C(0xe318, AGF, RXY_a, Z, r1, m2_32s, r1, 0, add, adds64) + F(0xb30a, AEBR, RRE, Z, e1, e2, new, e1, aeb, f32, IF_BFP) + F(0xb31a, ADBR, RRE, Z, f1, f2, new, f1, adb, f64, IF_BFP) + F(0xb34a, AXBR, RRE, Z, x2h, x2l, x1, x1, axb, f128, IF_BFP) + F(0xed0a, AEB, RXE, Z, e1, m2_32u, new, e1, aeb, f32, IF_BFP) + F(0xed1a, ADB, RXE, Z, f1, m2_64, new, f1, adb, f64, IF_BFP) +/* ADD HIGH */ + C(0xb9c8, AHHHR, RRF_a, HW, r2_sr32, r3_sr32, new, r1_32h, add, adds32) + C(0xb9d8, AHHLR, RRF_a, HW, r2_sr32, r3, new, r1_32h, add, adds32) +/* ADD IMMEDIATE */ + C(0xc209, AFI, RIL_a, EI, r1, i2, new, r1_32, add, adds32) + D(0xeb6a, ASI, SIY, GIE, la1, i2, new, 0, asi, adds32, MO_TESL) + C(0xecd8, AHIK, RIE_d, DO, r3, i2, new, r1_32, add, adds32) + C(0xc208, AGFI, RIL_a, EI, r1, i2, r1, 0, add, adds64) + D(0xeb7a, AGSI, SIY, GIE, la1, i2, new, 0, asi, adds64, MO_TEQ) + C(0xecd9, AGHIK, RIE_d, DO, r3, i2, r1, 0, add, adds64) +/* ADD IMMEDIATE HIGH */ + C(0xcc08, AIH, RIL_a, HW, r1_sr32, i2, new, r1_32h, add, adds32) +/* ADD HALFWORD */ + C(0x4a00, AH, RX_a, Z, r1, m2_16s, new, r1_32, add, adds32) + C(0xe37a, AHY, RXY_a, LD, r1, m2_16s, new, r1_32, add, adds32) + C(0xe338, AGH, RXY_a, MIE2,r1, m2_16s, r1, 0, add, adds64) +/* ADD HALFWORD IMMEDIATE */ + C(0xa70a, AHI, RI_a, Z, r1, i2, new, r1_32, add, adds32) + C(0xa70b, AGHI, RI_a, Z, r1, i2, r1, 0, add, adds64) + +/* ADD LOGICAL */ + C(0x1e00, ALR, RR_a, Z, r1_32u, r2_32u, new, r1_32, add, addu32) + C(0xb9fa, ALRK, RRF_a, DO, r2_32u, r3_32u, new, r1_32, add, addu32) + C(0x5e00, AL, RX_a, Z, r1_32u, m2_32u, new, r1_32, add, addu32) + C(0xe35e, ALY, RXY_a, LD, r1_32u, m2_32u, new, r1_32, add, addu32) + C(0xb90a, ALGR, RRE, Z, r1, r2, r1, 0, addu64, addu64) + C(0xb91a, ALGFR, RRE, Z, r1, r2_32u, r1, 0, addu64, addu64) + C(0xb9ea, ALGRK, RRF_a, DO, r2, r3, r1, 0, addu64, addu64) + C(0xe30a, ALG, RXY_a, Z, r1, m2_64, r1, 0, addu64, addu64) + C(0xe31a, ALGF, RXY_a, Z, r1, m2_32u, r1, 0, addu64, addu64) +/* ADD LOGICAL HIGH */ + C(0xb9ca, ALHHHR, RRF_a, HW, r2_sr32, r3_sr32, new, r1_32h, add, addu32) + C(0xb9da, ALHHLR, RRF_a, HW, r2_sr32, r3_32u, new, r1_32h, add, addu32) +/* ADD LOGICAL IMMEDIATE */ + C(0xc20b, ALFI, RIL_a, EI, r1_32u, i2_32u, new, r1_32, add, addu32) + C(0xc20a, ALGFI, RIL_a, EI, r1, i2_32u, r1, 0, addu64, addu64) +/* ADD LOGICAL WITH SIGNED IMMEDIATE */ + D(0xeb6e, ALSI, SIY, GIE, la1, i2_32u, new, 0, asi, addu32, MO_TEUL) + C(0xecda, ALHSIK, RIE_d, DO, r3_32u, i2_32u, new, r1_32, add, addu32) + D(0xeb7e, ALGSI, SIY, GIE, la1, i2, new, 0, asiu64, addu64, MO_TEQ) + C(0xecdb, ALGHSIK, RIE_d, DO, r3, i2, r1, 0, addu64, addu64) +/* ADD LOGICAL WITH SIGNED IMMEDIATE HIGH */ + C(0xcc0a, ALSIH, RIL_a, HW, r1_sr32, i2_32u, new, r1_32h, add, addu32) + C(0xcc0b, ALSIHN, RIL_a, HW, r1_sr32, i2_32u, new, r1_32h, add, 0) +/* ADD LOGICAL WITH CARRY */ + C(0xb998, ALCR, RRE, Z, r1_32u, r2_32u, new, r1_32, addc32, addu32) + C(0xb988, ALCGR, RRE, Z, r1, r2, r1, 0, addc64, addu64) + C(0xe398, ALC, RXY_a, Z, r1_32u, m2_32u, new, r1_32, addc32, addu32) + C(0xe388, ALCG, RXY_a, Z, r1, m2_64, r1, 0, addc64, addu64) + +/* AND */ + C(0x1400, NR, RR_a, Z, r1, r2, new, r1_32, and, nz32) + C(0xb9f4, NRK, RRF_a, DO, r2, r3, new, r1_32, and, nz32) + C(0x5400, N, RX_a, Z, r1, m2_32s, new, r1_32, and, nz32) + C(0xe354, NY, RXY_a, LD, r1, m2_32s, new, r1_32, and, nz32) + C(0xb980, NGR, RRE, Z, r1, r2, r1, 0, and, nz64) + C(0xb9e4, NGRK, RRF_a, DO, r2, r3, r1, 0, and, nz64) + C(0xe380, NG, RXY_a, Z, r1, m2_64, r1, 0, and, nz64) + C(0xd400, NC, SS_a, Z, la1, a2, 0, 0, nc, 0) +/* AND IMMEDIATE */ + D(0xc00a, NIHF, RIL_a, EI, r1_o, i2_32u, r1, 0, andi, 0, 0x2020) + D(0xc00b, NILF, RIL_a, EI, r1_o, i2_32u, r1, 0, andi, 0, 0x2000) + D(0xa504, NIHH, RI_a, Z, r1_o, i2_16u, r1, 0, andi, 0, 0x1030) + D(0xa505, NIHL, RI_a, Z, r1_o, i2_16u, r1, 0, andi, 0, 0x1020) + D(0xa506, NILH, RI_a, Z, r1_o, i2_16u, r1, 0, andi, 0, 0x1010) + D(0xa507, NILL, RI_a, Z, r1_o, i2_16u, r1, 0, andi, 0, 0x1000) + D(0x9400, NI, SI, Z, la1, i2_8u, new, 0, ni, nz64, MO_UB) + D(0xeb54, NIY, SIY, LD, la1, i2_8u, new, 0, ni, nz64, MO_UB) + +/* BRANCH AND LINK */ + C(0x0500, BALR, RR_a, Z, 0, r2_nz, r1, 0, bal, 0) + C(0x4500, BAL, RX_a, Z, 0, a2, r1, 0, bal, 0) +/* BRANCH AND SAVE */ + C(0x0d00, BASR, RR_a, Z, 0, r2_nz, r1, 0, bas, 0) + C(0x4d00, BAS, RX_a, Z, 0, a2, r1, 0, bas, 0) +/* BRANCH RELATIVE AND SAVE */ + C(0xa705, BRAS, RI_b, Z, 0, 0, r1, 0, basi, 0) + C(0xc005, BRASL, RIL_b, Z, 0, 0, r1, 0, basi, 0) +/* BRANCH INDIRECT ON CONDITION */ + C(0xe347, BIC, RXY_b, MIE2,0, m2_64w, 0, 0, bc, 0) +/* BRANCH ON CONDITION */ + C(0x0700, BCR, RR_b, Z, 0, r2_nz, 0, 0, bc, 0) + C(0x4700, BC, RX_b, Z, 0, a2, 0, 0, bc, 0) +/* BRANCH RELATIVE ON CONDITION */ + C(0xa704, BRC, RI_c, Z, 0, 0, 0, 0, bc, 0) + C(0xc004, BRCL, RIL_c, Z, 0, 0, 0, 0, bc, 0) +/* BRANCH ON COUNT */ + C(0x0600, BCTR, RR_a, Z, 0, r2_nz, 0, 0, bct32, 0) + C(0xb946, BCTGR, RRE, Z, 0, r2_nz, 0, 0, bct64, 0) + C(0x4600, BCT, RX_a, Z, 0, a2, 0, 0, bct32, 0) + C(0xe346, BCTG, RXY_a, Z, 0, a2, 0, 0, bct64, 0) +/* BRANCH RELATIVE ON COUNT */ + C(0xa706, BRCT, RI_b, Z, 0, 0, 0, 0, bct32, 0) + C(0xa707, BRCTG, RI_b, Z, 0, 0, 0, 0, bct64, 0) +/* BRANCH RELATIVE ON COUNT HIGH */ + C(0xcc06, BRCTH, RIL_b, HW, 0, 0, 0, 0, bcth, 0) +/* BRANCH ON INDEX */ + D(0x8600, BXH, RS_a, Z, 0, a2, 0, 0, bx32, 0, 0) + D(0x8700, BXLE, RS_a, Z, 0, a2, 0, 0, bx32, 0, 1) + D(0xeb44, BXHG, RSY_a, Z, 0, a2, 0, 0, bx64, 0, 0) + D(0xeb45, BXLEG, RSY_a, Z, 0, a2, 0, 0, bx64, 0, 1) +/* BRANCH RELATIVE ON INDEX */ + D(0x8400, BRXH, RSI, Z, 0, 0, 0, 0, bx32, 0, 0) + D(0x8500, BRXLE, RSI, Z, 0, 0, 0, 0, bx32, 0, 1) + D(0xec44, BRXHG, RIE_e, Z, 0, 0, 0, 0, bx64, 0, 0) + D(0xec45, BRXHLE, RIE_e, Z, 0, 0, 0, 0, bx64, 0, 1) +/* BRANCH PREDICTION PRELOAD */ + /* ??? Format is SMI, but implemented as NOP, so we need no fields. */ + C(0xc700, BPP, E, EH, 0, 0, 0, 0, 0, 0) +/* BRANCH PREDICTION RELATIVE PRELOAD */ + /* ??? Format is MII, but implemented as NOP, so we need no fields. */ + C(0xc500, BPRP, E, EH, 0, 0, 0, 0, 0, 0) +/* NEXT INSTRUCTION ACCESS INTENT */ + /* ??? Format is IE, but implemented as NOP, so we need no fields. */ + C(0xb2fa, NIAI, E, EH, 0, 0, 0, 0, 0, 0) + +/* CHECKSUM */ + C(0xb241, CKSM, RRE, Z, r1_o, ra2, new, r1_32, cksm, 0) + +/* COPY SIGN */ + F(0xb372, CPSDR, RRF_b, FPSSH, f3, f2, new, f1, cps, 0, IF_AFP1 | IF_AFP2 | IF_AFP3) + +/* COMPARE */ + C(0x1900, CR, RR_a, Z, r1_o, r2_o, 0, 0, 0, cmps32) + C(0x5900, C, RX_a, Z, r1_o, m2_32s, 0, 0, 0, cmps32) + C(0xe359, CY, RXY_a, LD, r1_o, m2_32s, 0, 0, 0, cmps32) + C(0xb920, CGR, RRE, Z, r1_o, r2_o, 0, 0, 0, cmps64) + C(0xb930, CGFR, RRE, Z, r1_o, r2_32s, 0, 0, 0, cmps64) + C(0xe320, CG, RXY_a, Z, r1_o, m2_64, 0, 0, 0, cmps64) + C(0xe330, CGF, RXY_a, Z, r1_o, m2_32s, 0, 0, 0, cmps64) + F(0xb309, CEBR, RRE, Z, e1, e2, 0, 0, ceb, 0, IF_BFP) + F(0xb319, CDBR, RRE, Z, f1, f2, 0, 0, cdb, 0, IF_BFP) + F(0xb349, CXBR, RRE, Z, x2h, x2l, x1, 0, cxb, 0, IF_BFP) + F(0xed09, CEB, RXE, Z, e1, m2_32u, 0, 0, ceb, 0, IF_BFP) + F(0xed19, CDB, RXE, Z, f1, m2_64, 0, 0, cdb, 0, IF_BFP) +/* COMPARE AND SIGNAL */ + F(0xb308, KEBR, RRE, Z, e1, e2, 0, 0, keb, 0, IF_BFP) + F(0xb318, KDBR, RRE, Z, f1, f2, 0, 0, kdb, 0, IF_BFP) + F(0xb348, KXBR, RRE, Z, x2h, x2l, x1, 0, kxb, 0, IF_BFP) + F(0xed08, KEB, RXE, Z, e1, m2_32u, 0, 0, keb, 0, IF_BFP) + F(0xed18, KDB, RXE, Z, f1, m2_64, 0, 0, kdb, 0, IF_BFP) +/* COMPARE IMMEDIATE */ + C(0xc20d, CFI, RIL_a, EI, r1, i2, 0, 0, 0, cmps32) + C(0xc20c, CGFI, RIL_a, EI, r1, i2, 0, 0, 0, cmps64) +/* COMPARE RELATIVE LONG */ + C(0xc60d, CRL, RIL_b, GIE, r1, mri2_32s, 0, 0, 0, cmps32) + C(0xc608, CGRL, RIL_b, GIE, r1, mri2_64, 0, 0, 0, cmps64) + C(0xc60c, CGFRL, RIL_b, GIE, r1, mri2_32s, 0, 0, 0, cmps64) +/* COMPARE HALFWORD */ + C(0x4900, CH, RX_a, Z, r1_o, m2_16s, 0, 0, 0, cmps32) + C(0xe379, CHY, RXY_a, LD, r1_o, m2_16s, 0, 0, 0, cmps32) + C(0xe334, CGH, RXY_a, GIE, r1_o, m2_16s, 0, 0, 0, cmps64) +/* COMPARE HALFWORD IMMEDIATE */ + C(0xa70e, CHI, RI_a, Z, r1_o, i2, 0, 0, 0, cmps32) + C(0xa70f, CGHI, RI_a, Z, r1_o, i2, 0, 0, 0, cmps64) + C(0xe554, CHHSI, SIL, GIE, m1_16s, i2, 0, 0, 0, cmps64) + C(0xe55c, CHSI, SIL, GIE, m1_32s, i2, 0, 0, 0, cmps64) + C(0xe558, CGHSI, SIL, GIE, m1_64, i2, 0, 0, 0, cmps64) +/* COMPARE HALFWORD RELATIVE LONG */ + C(0xc605, CHRL, RIL_b, GIE, r1_o, mri2_32s, 0, 0, 0, cmps32) + C(0xc604, CGHRL, RIL_b, GIE, r1_o, mri2_64, 0, 0, 0, cmps64) +/* COMPARE HIGH */ + C(0xb9cd, CHHR, RRE, HW, r1_sr32, r2_sr32, 0, 0, 0, cmps32) + C(0xb9dd, CHLR, RRE, HW, r1_sr32, r2_o, 0, 0, 0, cmps32) + C(0xe3cd, CHF, RXY_a, HW, r1_sr32, m2_32s, 0, 0, 0, cmps32) +/* COMPARE IMMEDIATE HIGH */ + C(0xcc0d, CIH, RIL_a, HW, r1_sr32, i2, 0, 0, 0, cmps32) + +/* COMPARE LOGICAL */ + C(0x1500, CLR, RR_a, Z, r1, r2, 0, 0, 0, cmpu32) + C(0x5500, CL, RX_a, Z, r1, m2_32s, 0, 0, 0, cmpu32) + C(0xe355, CLY, RXY_a, LD, r1, m2_32s, 0, 0, 0, cmpu32) + C(0xb921, CLGR, RRE, Z, r1, r2, 0, 0, 0, cmpu64) + C(0xb931, CLGFR, RRE, Z, r1, r2_32u, 0, 0, 0, cmpu64) + C(0xe321, CLG, RXY_a, Z, r1, m2_64, 0, 0, 0, cmpu64) + C(0xe331, CLGF, RXY_a, Z, r1, m2_32u, 0, 0, 0, cmpu64) + C(0xd500, CLC, SS_a, Z, la1, a2, 0, 0, clc, 0) +/* COMPARE LOGICAL HIGH */ + C(0xb9cf, CLHHR, RRE, HW, r1_sr32, r2_sr32, 0, 0, 0, cmpu32) + C(0xb9df, CLHLR, RRE, HW, r1_sr32, r2_o, 0, 0, 0, cmpu32) + C(0xe3cf, CLHF, RXY_a, HW, r1_sr32, m2_32s, 0, 0, 0, cmpu32) +/* COMPARE LOGICAL IMMEDIATE */ + C(0xc20f, CLFI, RIL_a, EI, r1, i2, 0, 0, 0, cmpu32) + C(0xc20e, CLGFI, RIL_a, EI, r1, i2_32u, 0, 0, 0, cmpu64) + C(0x9500, CLI, SI, Z, m1_8u, i2_8u, 0, 0, 0, cmpu64) + C(0xeb55, CLIY, SIY, LD, m1_8u, i2_8u, 0, 0, 0, cmpu64) + C(0xe555, CLHHSI, SIL, GIE, m1_16u, i2_16u, 0, 0, 0, cmpu64) + C(0xe55d, CLFHSI, SIL, GIE, m1_32u, i2_16u, 0, 0, 0, cmpu64) + C(0xe559, CLGHSI, SIL, GIE, m1_64, i2_16u, 0, 0, 0, cmpu64) +/* COMPARE LOGICAL IMMEDIATE HIGH */ + C(0xcc0f, CLIH, RIL_a, HW, r1_sr32, i2, 0, 0, 0, cmpu32) +/* COMPARE LOGICAL RELATIVE LONG */ + C(0xc60f, CLRL, RIL_b, GIE, r1_o, mri2_32u, 0, 0, 0, cmpu32) + C(0xc60a, CLGRL, RIL_b, GIE, r1_o, mri2_64, 0, 0, 0, cmpu64) + C(0xc60e, CLGFRL, RIL_b, GIE, r1_o, mri2_32u, 0, 0, 0, cmpu64) + C(0xc607, CLHRL, RIL_b, GIE, r1_o, mri2_16u, 0, 0, 0, cmpu32) + C(0xc606, CLGHRL, RIL_b, GIE, r1_o, mri2_16u, 0, 0, 0, cmpu64) +/* COMPARE LOGICAL LONG */ + C(0x0f00, CLCL, RR_a, Z, 0, 0, 0, 0, clcl, 0) +/* COMPARE LOGICAL LONG EXTENDED */ + C(0xa900, CLCLE, RS_a, Z, 0, a2, 0, 0, clcle, 0) +/* COMPARE LOGICAL LONG UNICODE */ + C(0xeb8f, CLCLU, RSY_a, E2, 0, a2, 0, 0, clclu, 0) +/* COMPARE LOGICAL CHARACTERS UNDER MASK */ + C(0xbd00, CLM, RS_b, Z, r1_o, a2, 0, 0, clm, 0) + C(0xeb21, CLMY, RSY_b, LD, r1_o, a2, 0, 0, clm, 0) + C(0xeb20, CLMH, RSY_b, Z, r1_sr32, a2, 0, 0, clm, 0) +/* COMPARE LOGICAL STRING */ + C(0xb25d, CLST, RRE, Z, r1_o, r2_o, 0, 0, clst, 0) + +/* COMPARE AND BRANCH */ + D(0xecf6, CRB, RRS, GIE, r1_32s, r2_32s, 0, 0, cj, 0, 0) + D(0xece4, CGRB, RRS, GIE, r1_o, r2_o, 0, 0, cj, 0, 0) + D(0xec76, CRJ, RIE_b, GIE, r1_32s, r2_32s, 0, 0, cj, 0, 0) + D(0xec64, CGRJ, RIE_b, GIE, r1_o, r2_o, 0, 0, cj, 0, 0) + D(0xecfe, CIB, RIS, GIE, r1_32s, i2, 0, 0, cj, 0, 0) + D(0xecfc, CGIB, RIS, GIE, r1_o, i2, 0, 0, cj, 0, 0) + D(0xec7e, CIJ, RIE_c, GIE, r1_32s, i2, 0, 0, cj, 0, 0) + D(0xec7c, CGIJ, RIE_c, GIE, r1_o, i2, 0, 0, cj, 0, 0) +/* COMPARE LOGICAL AND BRANCH */ + D(0xecf7, CLRB, RRS, GIE, r1_32u, r2_32u, 0, 0, cj, 0, 1) + D(0xece5, CLGRB, RRS, GIE, r1_o, r2_o, 0, 0, cj, 0, 1) + D(0xec77, CLRJ, RIE_b, GIE, r1_32u, r2_32u, 0, 0, cj, 0, 1) + D(0xec65, CLGRJ, RIE_b, GIE, r1_o, r2_o, 0, 0, cj, 0, 1) + D(0xecff, CLIB, RIS, GIE, r1_32u, i2_8u, 0, 0, cj, 0, 1) + D(0xecfd, CLGIB, RIS, GIE, r1_o, i2_8u, 0, 0, cj, 0, 1) + D(0xec7f, CLIJ, RIE_c, GIE, r1_32u, i2_8u, 0, 0, cj, 0, 1) + D(0xec7d, CLGIJ, RIE_c, GIE, r1_o, i2_8u, 0, 0, cj, 0, 1) + +/* COMPARE AND SWAP */ + D(0xba00, CS, RS_a, Z, r3_32u, r1_32u, new, r1_32, cs, 0, MO_TEUL) + D(0xeb14, CSY, RSY_a, LD, r3_32u, r1_32u, new, r1_32, cs, 0, MO_TEUL) + D(0xeb30, CSG, RSY_a, Z, r3_o, r1_o, new, r1, cs, 0, MO_TEQ) +/* COMPARE DOUBLE AND SWAP */ + D(0xbb00, CDS, RS_a, Z, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEQ) + D(0xeb31, CDSY, RSY_a, LD, r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEQ) + C(0xeb3e, CDSG, RSY_a, Z, 0, 0, 0, 0, cdsg, 0) +/* COMPARE AND SWAP AND STORE */ + C(0xc802, CSST, SSF, CASS, la1, a2, 0, 0, csst, 0) + +/* COMPARE AND TRAP */ + D(0xb972, CRT, RRF_c, GIE, r1_32s, r2_32s, 0, 0, ct, 0, 0) + D(0xb960, CGRT, RRF_c, GIE, r1_o, r2_o, 0, 0, ct, 0, 0) + D(0xec72, CIT, RIE_a, GIE, r1_32s, i2, 0, 0, ct, 0, 0) + D(0xec70, CGIT, RIE_a, GIE, r1_o, i2, 0, 0, ct, 0, 0) +/* COMPARE LOGICAL AND TRAP */ + D(0xb973, CLRT, RRF_c, GIE, r1_32u, r2_32u, 0, 0, ct, 0, 1) + D(0xb961, CLGRT, RRF_c, GIE, r1_o, r2_o, 0, 0, ct, 0, 1) + D(0xeb23, CLT, RSY_b, MIE, r1_32u, m2_32u, 0, 0, ct, 0, 1) + D(0xeb2b, CLGT, RSY_b, MIE, r1_o, m2_64, 0, 0, ct, 0, 1) + D(0xec73, CLFIT, RIE_a, GIE, r1_32u, i2_32u, 0, 0, ct, 0, 1) + D(0xec71, CLGIT, RIE_a, GIE, r1_o, i2_32u, 0, 0, ct, 0, 1) + +/* CONVERT TO DECIMAL */ + C(0x4e00, CVD, RX_a, Z, r1_o, a2, 0, 0, cvd, 0) + C(0xe326, CVDY, RXY_a, LD, r1_o, a2, 0, 0, cvd, 0) +/* CONVERT TO FIXED */ + F(0xb398, CFEBR, RRF_e, Z, 0, e2, new, r1_32, cfeb, 0, IF_BFP) + F(0xb399, CFDBR, RRF_e, Z, 0, f2, new, r1_32, cfdb, 0, IF_BFP) + F(0xb39a, CFXBR, RRF_e, Z, x2h, x2l, new, r1_32, cfxb, 0, IF_BFP) + F(0xb3a8, CGEBR, RRF_e, Z, 0, e2, r1, 0, cgeb, 0, IF_BFP) + F(0xb3a9, CGDBR, RRF_e, Z, 0, f2, r1, 0, cgdb, 0, IF_BFP) + F(0xb3aa, CGXBR, RRF_e, Z, x2h, x2l, r1, 0, cgxb, 0, IF_BFP) +/* CONVERT FROM FIXED */ + F(0xb394, CEFBR, RRF_e, Z, 0, r2_32s, new, e1, cegb, 0, IF_BFP) + F(0xb395, CDFBR, RRF_e, Z, 0, r2_32s, new, f1, cdgb, 0, IF_BFP) + F(0xb396, CXFBR, RRF_e, Z, 0, r2_32s, new_P, x1, cxgb, 0, IF_BFP) + F(0xb3a4, CEGBR, RRF_e, Z, 0, r2_o, new, e1, cegb, 0, IF_BFP) + F(0xb3a5, CDGBR, RRF_e, Z, 0, r2_o, new, f1, cdgb, 0, IF_BFP) + F(0xb3a6, CXGBR, RRF_e, Z, 0, r2_o, new_P, x1, cxgb, 0, IF_BFP) +/* CONVERT TO LOGICAL */ + F(0xb39c, CLFEBR, RRF_e, FPE, 0, e2, new, r1_32, clfeb, 0, IF_BFP) + F(0xb39d, CLFDBR, RRF_e, FPE, 0, f2, new, r1_32, clfdb, 0, IF_BFP) + F(0xb39e, CLFXBR, RRF_e, FPE, x2h, x2l, new, r1_32, clfxb, 0, IF_BFP) + F(0xb3ac, CLGEBR, RRF_e, FPE, 0, e2, r1, 0, clgeb, 0, IF_BFP) + F(0xb3ad, CLGDBR, RRF_e, FPE, 0, f2, r1, 0, clgdb, 0, IF_BFP) + F(0xb3ae, CLGXBR, RRF_e, FPE, x2h, x2l, r1, 0, clgxb, 0, IF_BFP) +/* CONVERT FROM LOGICAL */ + F(0xb390, CELFBR, RRF_e, FPE, 0, r2_32u, new, e1, celgb, 0, IF_BFP) + F(0xb391, CDLFBR, RRF_e, FPE, 0, r2_32u, new, f1, cdlgb, 0, IF_BFP) + F(0xb392, CXLFBR, RRF_e, FPE, 0, r2_32u, new_P, x1, cxlgb, 0, IF_BFP) + F(0xb3a0, CELGBR, RRF_e, FPE, 0, r2_o, new, e1, celgb, 0, IF_BFP) + F(0xb3a1, CDLGBR, RRF_e, FPE, 0, r2_o, new, f1, cdlgb, 0, IF_BFP) + F(0xb3a2, CXLGBR, RRF_e, FPE, 0, r2_o, new_P, x1, cxlgb, 0, IF_BFP) + +/* CONVERT UTF-8 TO UTF-16 */ + D(0xb2a7, CU12, RRF_c, Z, 0, 0, 0, 0, cuXX, 0, 12) +/* CONVERT UTF-8 TO UTF-32 */ + D(0xb9b0, CU14, RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 14) +/* CONVERT UTF-16 to UTF-8 */ + D(0xb2a6, CU21, RRF_c, Z, 0, 0, 0, 0, cuXX, 0, 21) +/* CONVERT UTF-16 to UTF-32 */ + D(0xb9b1, CU24, RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 24) +/* CONVERT UTF-32 to UTF-8 */ + D(0xb9b2, CU41, RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 41) +/* CONVERT UTF-32 to UTF-16 */ + D(0xb9b3, CU42, RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 42) + +/* DIVIDE */ + C(0x1d00, DR, RR_a, Z, r1_D32, r2_32s, new_P, r1_P32, divs32, 0) + C(0x5d00, D, RX_a, Z, r1_D32, m2_32s, new_P, r1_P32, divs32, 0) + F(0xb30d, DEBR, RRE, Z, e1, e2, new, e1, deb, 0, IF_BFP) + F(0xb31d, DDBR, RRE, Z, f1, f2, new, f1, ddb, 0, IF_BFP) + F(0xb34d, DXBR, RRE, Z, x2h, x2l, x1, x1, dxb, 0, IF_BFP) + F(0xed0d, DEB, RXE, Z, e1, m2_32u, new, e1, deb, 0, IF_BFP) + F(0xed1d, DDB, RXE, Z, f1, m2_64, new, f1, ddb, 0, IF_BFP) +/* DIVIDE LOGICAL */ + C(0xb997, DLR, RRE, Z, r1_D32, r2_32u, new_P, r1_P32, divu32, 0) + C(0xe397, DL, RXY_a, Z, r1_D32, m2_32u, new_P, r1_P32, divu32, 0) + C(0xb987, DLGR, RRE, Z, 0, r2_o, r1_P, 0, divu64, 0) + C(0xe387, DLG, RXY_a, Z, 0, m2_64, r1_P, 0, divu64, 0) +/* DIVIDE SINGLE */ + C(0xb90d, DSGR, RRE, Z, r1p1, r2, r1_P, 0, divs64, 0) + C(0xb91d, DSGFR, RRE, Z, r1p1, r2_32s, r1_P, 0, divs64, 0) + C(0xe30d, DSG, RXY_a, Z, r1p1, m2_64, r1_P, 0, divs64, 0) + C(0xe31d, DSGF, RXY_a, Z, r1p1, m2_32s, r1_P, 0, divs64, 0) + +/* EXCLUSIVE OR */ + C(0x1700, XR, RR_a, Z, r1, r2, new, r1_32, xor, nz32) + C(0xb9f7, XRK, RRF_a, DO, r2, r3, new, r1_32, xor, nz32) + C(0x5700, X, RX_a, Z, r1, m2_32s, new, r1_32, xor, nz32) + C(0xe357, XY, RXY_a, LD, r1, m2_32s, new, r1_32, xor, nz32) + C(0xb982, XGR, RRE, Z, r1, r2, r1, 0, xor, nz64) + C(0xb9e7, XGRK, RRF_a, DO, r2, r3, r1, 0, xor, nz64) + C(0xe382, XG, RXY_a, Z, r1, m2_64, r1, 0, xor, nz64) + C(0xd700, XC, SS_a, Z, 0, 0, 0, 0, xc, 0) +/* EXCLUSIVE OR IMMEDIATE */ + D(0xc006, XIHF, RIL_a, EI, r1_o, i2_32u, r1, 0, xori, 0, 0x2020) + D(0xc007, XILF, RIL_a, EI, r1_o, i2_32u, r1, 0, xori, 0, 0x2000) + D(0x9700, XI, SI, Z, la1, i2_8u, new, 0, xi, nz64, MO_UB) + D(0xeb57, XIY, SIY, LD, la1, i2_8u, new, 0, xi, nz64, MO_UB) + +/* EXECUTE */ + C(0x4400, EX, RX_a, Z, 0, a2, 0, 0, ex, 0) +/* EXECUTE RELATIVE LONG */ + C(0xc600, EXRL, RIL_b, EE, 0, ri2, 0, 0, ex, 0) + +/* EXTRACT ACCESS */ + C(0xb24f, EAR, RRE, Z, 0, 0, new, r1_32, ear, 0) +/* EXTRACT CPU ATTRIBUTE */ + C(0xeb4c, ECAG, RSY_a, GIE, 0, a2, r1, 0, ecag, 0) +/* EXTRACT CPU TIME */ + F(0xc801, ECTG, SSF, ECT, 0, 0, 0, 0, ectg, 0, IF_IO) +/* EXTRACT FPC */ + F(0xb38c, EFPC, RRE, Z, 0, 0, new, r1_32, efpc, 0, IF_BFP) +/* EXTRACT PSW */ + C(0xb98d, EPSW, RRE, Z, 0, 0, 0, 0, epsw, 0) + +/* FIND LEFTMOST ONE */ + C(0xb983, FLOGR, RRE, EI, 0, r2_o, r1_P, 0, flogr, 0) + +/* INSERT CHARACTER */ + C(0x4300, IC, RX_a, Z, 0, m2_8u, 0, r1_8, mov2, 0) + C(0xe373, ICY, RXY_a, LD, 0, m2_8u, 0, r1_8, mov2, 0) +/* INSERT CHARACTERS UNDER MASK */ + D(0xbf00, ICM, RS_b, Z, 0, a2, r1, 0, icm, 0, 0) + D(0xeb81, ICMY, RSY_b, LD, 0, a2, r1, 0, icm, 0, 0) + D(0xeb80, ICMH, RSY_b, Z, 0, a2, r1, 0, icm, 0, 32) +/* INSERT IMMEDIATE */ + D(0xc008, IIHF, RIL_a, EI, r1_o, i2_32u, r1, 0, insi, 0, 0x2020) + D(0xc009, IILF, RIL_a, EI, r1_o, i2_32u, r1, 0, insi, 0, 0x2000) + D(0xa500, IIHH, RI_a, Z, r1_o, i2_16u, r1, 0, insi, 0, 0x1030) + D(0xa501, IIHL, RI_a, Z, r1_o, i2_16u, r1, 0, insi, 0, 0x1020) + D(0xa502, IILH, RI_a, Z, r1_o, i2_16u, r1, 0, insi, 0, 0x1010) + D(0xa503, IILL, RI_a, Z, r1_o, i2_16u, r1, 0, insi, 0, 0x1000) +/* INSERT PROGRAM MASK */ + C(0xb222, IPM, RRE, Z, 0, 0, r1, 0, ipm, 0) + +/* LOAD */ + C(0x1800, LR, RR_a, Z, 0, r2_o, 0, cond_r1r2_32, mov2, 0) + C(0x5800, L, RX_a, Z, 0, a2, new, r1_32, ld32s, 0) + C(0xe358, LY, RXY_a, LD, 0, a2, new, r1_32, ld32s, 0) + C(0xb904, LGR, RRE, Z, 0, r2_o, 0, r1, mov2, 0) + C(0xb914, LGFR, RRE, Z, 0, r2_32s, 0, r1, mov2, 0) + C(0xe304, LG, RXY_a, Z, 0, a2, r1, 0, ld64, 0) + C(0xe314, LGF, RXY_a, Z, 0, a2, r1, 0, ld32s, 0) + F(0x2800, LDR, RR_a, Z, 0, f2, 0, f1, mov2, 0, IF_AFP1 | IF_AFP2) + F(0x6800, LD, RX_a, Z, 0, m2_64, 0, f1, mov2, 0, IF_AFP1) + F(0xed65, LDY, RXY_a, LD, 0, m2_64, 0, f1, mov2, 0, IF_AFP1) + F(0x3800, LER, RR_a, Z, 0, e2, 0, cond_e1e2, mov2, 0, IF_AFP1 | IF_AFP2) + F(0x7800, LE, RX_a, Z, 0, m2_32u, 0, e1, mov2, 0, IF_AFP1) + F(0xed64, LEY, RXY_a, LD, 0, m2_32u, 0, e1, mov2, 0, IF_AFP1) + F(0xb365, LXR, RRE, Z, x2h, x2l, 0, x1, movx, 0, IF_AFP1) +/* LOAD IMMEDIATE */ + C(0xc001, LGFI, RIL_a, EI, 0, i2, 0, r1, mov2, 0) +/* LOAD RELATIVE LONG */ + C(0xc40d, LRL, RIL_b, GIE, 0, ri2, new, r1_32, ld32s, 0) + C(0xc408, LGRL, RIL_b, GIE, 0, ri2, r1, 0, ld64, 0) + C(0xc40c, LGFRL, RIL_b, GIE, 0, ri2, r1, 0, ld32s, 0) +/* LOAD ADDRESS */ + C(0x4100, LA, RX_a, Z, 0, a2, 0, r1, mov2, 0) + C(0xe371, LAY, RXY_a, LD, 0, a2, 0, r1, mov2, 0) +/* LOAD ADDRESS EXTENDED */ + C(0x5100, LAE, RX_a, Z, 0, a2, 0, r1, mov2e, 0) + C(0xe375, LAEY, RXY_a, GIE, 0, a2, 0, r1, mov2e, 0) +/* LOAD ADDRESS RELATIVE LONG */ + C(0xc000, LARL, RIL_b, Z, 0, ri2, 0, r1, mov2, 0) +/* LOAD AND ADD */ + D(0xebf8, LAA, RSY_a, ILA, r3_32s, a2, new, in2_r1_32, laa, adds32, MO_TESL) + D(0xebe8, LAAG, RSY_a, ILA, r3, a2, new, in2_r1, laa, adds64, MO_TEQ) +/* LOAD AND ADD LOGICAL */ + D(0xebfa, LAAL, RSY_a, ILA, r3_32u, a2, new, in2_r1_32, laa, addu32, MO_TEUL) + D(0xebea, LAALG, RSY_a, ILA, r3, a2, new, in2_r1, laa, addu64, MO_TEQ) +/* LOAD AND AND */ + D(0xebf4, LAN, RSY_a, ILA, r3_32s, a2, new, in2_r1_32, lan, nz32, MO_TESL) + D(0xebe4, LANG, RSY_a, ILA, r3, a2, new, in2_r1, lan, nz64, MO_TEQ) +/* LOAD AND EXCLUSIVE OR */ + D(0xebf7, LAX, RSY_a, ILA, r3_32s, a2, new, in2_r1_32, lax, nz32, MO_TESL) + D(0xebe7, LAXG, RSY_a, ILA, r3, a2, new, in2_r1, lax, nz64, MO_TEQ) +/* LOAD AND OR */ + D(0xebf6, LAO, RSY_a, ILA, r3_32s, a2, new, in2_r1_32, lao, nz32, MO_TESL) + D(0xebe6, LAOG, RSY_a, ILA, r3, a2, new, in2_r1, lao, nz64, MO_TEQ) +/* LOAD AND TEST */ + C(0x1200, LTR, RR_a, Z, 0, r2_o, 0, cond_r1r2_32, mov2, s32) + C(0xb902, LTGR, RRE, Z, 0, r2_o, 0, r1, mov2, s64) + C(0xb912, LTGFR, RRE, Z, 0, r2_32s, 0, r1, mov2, s64) + C(0xe312, LT, RXY_a, EI, 0, a2, new, r1_32, ld32s, s64) + C(0xe302, LTG, RXY_a, EI, 0, a2, r1, 0, ld64, s64) + C(0xe332, LTGF, RXY_a, GIE, 0, a2, r1, 0, ld32s, s64) + F(0xb302, LTEBR, RRE, Z, 0, e2, 0, cond_e1e2, mov2, f32, IF_BFP) + F(0xb312, LTDBR, RRE, Z, 0, f2, 0, f1, mov2, f64, IF_BFP) + F(0xb342, LTXBR, RRE, Z, x2h, x2l, 0, x1, movx, f128, IF_BFP) +/* LOAD AND TRAP */ + C(0xe39f, LAT, RXY_a, LAT, 0, m2_32u, r1, 0, lat, 0) + C(0xe385, LGAT, RXY_a, LAT, 0, a2, r1, 0, lgat, 0) +/* LOAD AND ZERO RIGHTMOST BYTE */ + C(0xe3eb, LZRF, RXY_a, LZRB, 0, m2_32u, new, r1_32, lzrb, 0) + C(0xe32a, LZRG, RXY_a, LZRB, 0, m2_64, r1, 0, lzrb, 0) +/* LOAD LOGICAL AND ZERO RIGHTMOST BYTE */ + C(0xe33a, LLZRGF, RXY_a, LZRB, 0, m2_32u, r1, 0, lzrb, 0) +/* LOAD BYTE */ + C(0xb926, LBR, RRE, EI, 0, r2_8s, 0, r1_32, mov2, 0) + C(0xb906, LGBR, RRE, EI, 0, r2_8s, 0, r1, mov2, 0) + C(0xe376, LB, RXY_a, LD, 0, a2, new, r1_32, ld8s, 0) + C(0xe377, LGB, RXY_a, LD, 0, a2, r1, 0, ld8s, 0) +/* LOAD BYTE HIGH */ + C(0xe3c0, LBH, RXY_a, HW, 0, a2, new, r1_32h, ld8s, 0) +/* LOAD COMPLEMENT */ + C(0x1300, LCR, RR_a, Z, 0, r2, new, r1_32, neg, neg32) + C(0xb903, LCGR, RRE, Z, 0, r2, r1, 0, neg, neg64) + C(0xb913, LCGFR, RRE, Z, 0, r2_32s, r1, 0, neg, neg64) + F(0xb303, LCEBR, RRE, Z, 0, e2, new, e1, negf32, f32, IF_BFP) + F(0xb313, LCDBR, RRE, Z, 0, f2, new, f1, negf64, f64, IF_BFP) + F(0xb343, LCXBR, RRE, Z, x2h, x2l, new_P, x1, negf128, f128, IF_BFP) + F(0xb373, LCDFR, RRE, FPSSH, 0, f2, new, f1, negf64, 0, IF_AFP1 | IF_AFP2) +/* LOAD COUNT TO BLOCK BOUNDARY */ + C(0xe727, LCBB, RXE, V, la2, 0, r1, 0, lcbb, 0) +/* LOAD HALFWORD */ + C(0xb927, LHR, RRE, EI, 0, r2_16s, 0, r1_32, mov2, 0) + C(0xb907, LGHR, RRE, EI, 0, r2_16s, 0, r1, mov2, 0) + C(0x4800, LH, RX_a, Z, 0, a2, new, r1_32, ld16s, 0) + C(0xe378, LHY, RXY_a, LD, 0, a2, new, r1_32, ld16s, 0) + C(0xe315, LGH, RXY_a, Z, 0, a2, r1, 0, ld16s, 0) +/* LOAD HALFWORD HIGH */ + C(0xe3c4, LHH, RXY_a, HW, 0, a2, new, r1_32h, ld16s, 0) +/* LOAD HALFWORD IMMEDIATE */ + C(0xa708, LHI, RI_a, Z, 0, i2, 0, r1_32, mov2, 0) + C(0xa709, LGHI, RI_a, Z, 0, i2, 0, r1, mov2, 0) +/* LOAD HALFWORD RELATIVE LONG */ + C(0xc405, LHRL, RIL_b, GIE, 0, ri2, new, r1_32, ld16s, 0) + C(0xc404, LGHRL, RIL_b, GIE, 0, ri2, r1, 0, ld16s, 0) +/* LOAD HIGH */ + C(0xe3ca, LFH, RXY_a, HW, 0, a2, new, r1_32h, ld32u, 0) +/* LOAG HIGH AND TRAP */ + C(0xe3c8, LFHAT, RXY_a, LAT, 0, m2_32u, r1, 0, lfhat, 0) +/* LOAD LOGICAL */ + C(0xb916, LLGFR, RRE, Z, 0, r2_32u, 0, r1, mov2, 0) + C(0xe316, LLGF, RXY_a, Z, 0, a2, r1, 0, ld32u, 0) +/* LOAD LOGICAL AND TRAP */ + C(0xe39d, LLGFAT, RXY_a, LAT, 0, a2, r1, 0, llgfat, 0) +/* LOAD LOGICAL RELATIVE LONG */ + C(0xc40e, LLGFRL, RIL_b, GIE, 0, ri2, r1, 0, ld32u, 0) +/* LOAD LOGICAL CHARACTER */ + C(0xb994, LLCR, RRE, EI, 0, r2_8u, 0, r1_32, mov2, 0) + C(0xb984, LLGCR, RRE, EI, 0, r2_8u, 0, r1, mov2, 0) + C(0xe394, LLC, RXY_a, EI, 0, a2, new, r1_32, ld8u, 0) + C(0xe390, LLGC, RXY_a, Z, 0, a2, r1, 0, ld8u, 0) +/* LOAD LOGICAL CHARACTER HIGH */ + C(0xe3c2, LLCH, RXY_a, HW, 0, a2, new, r1_32h, ld8u, 0) +/* LOAD LOGICAL HALFWORD */ + C(0xb995, LLHR, RRE, EI, 0, r2_16u, 0, r1_32, mov2, 0) + C(0xb985, LLGHR, RRE, EI, 0, r2_16u, 0, r1, mov2, 0) + C(0xe395, LLH, RXY_a, EI, 0, a2, new, r1_32, ld16u, 0) + C(0xe391, LLGH, RXY_a, Z, 0, a2, r1, 0, ld16u, 0) +/* LOAD LOGICAL HALFWORD HIGH */ + C(0xe3c6, LLHH, RXY_a, HW, 0, a2, new, r1_32h, ld16u, 0) +/* LOAD LOGICAL HALFWORD RELATIVE LONG */ + C(0xc402, LLHRL, RIL_b, GIE, 0, ri2, new, r1_32, ld16u, 0) + C(0xc406, LLGHRL, RIL_b, GIE, 0, ri2, r1, 0, ld16u, 0) +/* LOAD LOGICAL IMMEDATE */ + D(0xc00e, LLIHF, RIL_a, EI, 0, i2_32u_shl, 0, r1, mov2, 0, 32) + D(0xc00f, LLILF, RIL_a, EI, 0, i2_32u_shl, 0, r1, mov2, 0, 0) + D(0xa50c, LLIHH, RI_a, Z, 0, i2_16u_shl, 0, r1, mov2, 0, 48) + D(0xa50d, LLIHL, RI_a, Z, 0, i2_16u_shl, 0, r1, mov2, 0, 32) + D(0xa50e, LLILH, RI_a, Z, 0, i2_16u_shl, 0, r1, mov2, 0, 16) + D(0xa50f, LLILL, RI_a, Z, 0, i2_16u_shl, 0, r1, mov2, 0, 0) +/* LOAD LOGICAL THIRTY ONE BITS */ + C(0xb917, LLGTR, RRE, Z, 0, r2_o, r1, 0, llgt, 0) + C(0xe317, LLGT, RXY_a, Z, 0, m2_32u, r1, 0, llgt, 0) +/* LOAD LOGICAL THIRTY ONE BITS AND TRAP */ + C(0xe39c, LLGTAT, RXY_a, LAT, 0, m2_32u, r1, 0, llgtat, 0) + +/* LOAD FPR FROM GR */ + F(0xb3c1, LDGR, RRE, FPRGR, 0, r2_o, 0, f1, mov2, 0, IF_AFP1) +/* LOAD GR FROM FPR */ + F(0xb3cd, LGDR, RRE, FPRGR, 0, f2, 0, r1, mov2, 0, IF_AFP2) +/* LOAD NEGATIVE */ + C(0x1100, LNR, RR_a, Z, 0, r2_32s, new, r1_32, nabs, nabs32) + C(0xb901, LNGR, RRE, Z, 0, r2, r1, 0, nabs, nabs64) + C(0xb911, LNGFR, RRE, Z, 0, r2_32s, r1, 0, nabs, nabs64) + F(0xb301, LNEBR, RRE, Z, 0, e2, new, e1, nabsf32, f32, IF_BFP) + F(0xb311, LNDBR, RRE, Z, 0, f2, new, f1, nabsf64, f64, IF_BFP) + F(0xb341, LNXBR, RRE, Z, x2h, x2l, new_P, x1, nabsf128, f128, IF_BFP) + F(0xb371, LNDFR, RRE, FPSSH, 0, f2, new, f1, nabsf64, 0, IF_AFP1 | IF_AFP2) +/* LOAD ON CONDITION */ + C(0xb9f2, LOCR, RRF_c, LOC, r1, r2, new, r1_32, loc, 0) + C(0xb9e2, LOCGR, RRF_c, LOC, r1, r2, r1, 0, loc, 0) + C(0xebf2, LOC, RSY_b, LOC, r1, m2_32u, new, r1_32, loc, 0) + C(0xebe2, LOCG, RSY_b, LOC, r1, m2_64, r1, 0, loc, 0) +/* LOAD HALFWORD IMMEDIATE ON CONDITION */ + C(0xec42, LOCHI, RIE_g, LOC2, r1, i2, new, r1_32, loc, 0) + C(0xec46, LOCGHI, RIE_g, LOC2, r1, i2, r1, 0, loc, 0) + C(0xec4e, LOCHHI, RIE_g, LOC2, r1_sr32, i2, new, r1_32h, loc, 0) +/* LOAD HIGH ON CONDITION */ + C(0xb9e0, LOCFHR, RRF_c, LOC2, r1_sr32, r2, new, r1_32h, loc, 0) + C(0xebe0, LOCFH, RSY_b, LOC2, r1_sr32, m2_32u, new, r1_32h, loc, 0) +/* LOAD PAIR DISJOINT */ + D(0xc804, LPD, SSF, ILA, 0, 0, new_P, r3_P32, lpd, 0, MO_TEUL) + D(0xc805, LPDG, SSF, ILA, 0, 0, new_P, r3_P64, lpd, 0, MO_TEQ) +/* LOAD PAIR FROM QUADWORD */ + C(0xe38f, LPQ, RXY_a, Z, 0, a2, r1_P, 0, lpq, 0) +/* LOAD POSITIVE */ + C(0x1000, LPR, RR_a, Z, 0, r2_32s, new, r1_32, abs, abs32) + C(0xb900, LPGR, RRE, Z, 0, r2, r1, 0, abs, abs64) + C(0xb910, LPGFR, RRE, Z, 0, r2_32s, r1, 0, abs, abs64) + F(0xb300, LPEBR, RRE, Z, 0, e2, new, e1, absf32, f32, IF_BFP) + F(0xb310, LPDBR, RRE, Z, 0, f2, new, f1, absf64, f64, IF_BFP) + F(0xb340, LPXBR, RRE, Z, x2h, x2l, new_P, x1, absf128, f128, IF_BFP) + F(0xb370, LPDFR, RRE, FPSSH, 0, f2, new, f1, absf64, 0, IF_AFP1 | IF_AFP2) +/* LOAD REVERSED */ + C(0xb91f, LRVR, RRE, Z, 0, r2_32u, new, r1_32, rev32, 0) + C(0xb90f, LRVGR, RRE, Z, 0, r2_o, r1, 0, rev64, 0) + C(0xe31f, LRVH, RXY_a, Z, 0, m2_16u, new, r1_16, rev16, 0) + C(0xe31e, LRV, RXY_a, Z, 0, m2_32u, new, r1_32, rev32, 0) + C(0xe30f, LRVG, RXY_a, Z, 0, m2_64, r1, 0, rev64, 0) +/* LOAD ZERO */ + F(0xb374, LZER, RRE, Z, 0, 0, 0, e1, zero, 0, IF_AFP1) + F(0xb375, LZDR, RRE, Z, 0, 0, 0, f1, zero, 0, IF_AFP1) + F(0xb376, LZXR, RRE, Z, 0, 0, 0, x1, zero2, 0, IF_AFP1) + +/* LOAD FPC */ + F(0xb29d, LFPC, S, Z, 0, m2_32u, 0, 0, sfpc, 0, IF_BFP) +/* LOAD FPC AND SIGNAL */ + F(0xb2bd, LFAS, S, IEEEE_SIM, 0, m2_32u, 0, 0, sfas, 0, IF_DFP) +/* LOAD FP INTEGER */ + F(0xb357, FIEBR, RRF_e, Z, 0, e2, new, e1, fieb, 0, IF_BFP) + F(0xb35f, FIDBR, RRF_e, Z, 0, f2, new, f1, fidb, 0, IF_BFP) + F(0xb347, FIXBR, RRF_e, Z, x2h, x2l, new_P, x1, fixb, 0, IF_BFP) + +/* LOAD LENGTHENED */ + F(0xb304, LDEBR, RRE, Z, 0, e2, new, f1, ldeb, 0, IF_BFP) + F(0xb305, LXDBR, RRE, Z, 0, f2, new_P, x1, lxdb, 0, IF_BFP) + F(0xb306, LXEBR, RRE, Z, 0, e2, new_P, x1, lxeb, 0, IF_BFP) + F(0xed04, LDEB, RXE, Z, 0, m2_32u, new, f1, ldeb, 0, IF_BFP) + F(0xed05, LXDB, RXE, Z, 0, m2_64, new_P, x1, lxdb, 0, IF_BFP) + F(0xed06, LXEB, RXE, Z, 0, m2_32u, new_P, x1, lxeb, 0, IF_BFP) + F(0xb324, LDER, RXE, Z, 0, e2, new, f1, lde, 0, IF_AFP1) + F(0xed24, LDE, RXE, Z, 0, m2_32u, new, f1, lde, 0, IF_AFP1) +/* LOAD ROUNDED */ + F(0xb344, LEDBR, RRF_e, Z, 0, f2, new, e1, ledb, 0, IF_BFP) + F(0xb345, LDXBR, RRF_e, Z, x2h, x2l, new, f1, ldxb, 0, IF_BFP) + F(0xb346, LEXBR, RRF_e, Z, x2h, x2l, new, e1, lexb, 0, IF_BFP) + +/* LOAD MULTIPLE */ + C(0x9800, LM, RS_a, Z, 0, a2, 0, 0, lm32, 0) + C(0xeb98, LMY, RSY_a, LD, 0, a2, 0, 0, lm32, 0) + C(0xeb04, LMG, RSY_a, Z, 0, a2, 0, 0, lm64, 0) +/* LOAD MULTIPLE HIGH */ + C(0xeb96, LMH, RSY_a, Z, 0, a2, 0, 0, lmh, 0) +/* LOAD ACCESS MULTIPLE */ + C(0x9a00, LAM, RS_a, Z, 0, a2, 0, 0, lam, 0) + C(0xeb9a, LAMY, RSY_a, LD, 0, a2, 0, 0, lam, 0) + +/* MONITOR CALL */ + C(0xaf00, MC, SI, Z, la1, 0, 0, 0, mc, 0) + +/* MOVE */ + C(0xd200, MVC, SS_a, Z, la1, a2, 0, 0, mvc, 0) + C(0xe544, MVHHI, SIL, GIE, la1, i2, 0, m1_16, mov2, 0) + C(0xe54c, MVHI, SIL, GIE, la1, i2, 0, m1_32, mov2, 0) + C(0xe548, MVGHI, SIL, GIE, la1, i2, 0, m1_64, mov2, 0) + C(0x9200, MVI, SI, Z, la1, i2, 0, m1_8, mov2, 0) + C(0xeb52, MVIY, SIY, LD, la1, i2, 0, m1_8, mov2, 0) +/* MOVE INVERSE */ + C(0xe800, MVCIN, SS_a, Z, la1, a2, 0, 0, mvcin, 0) +/* MOVE LONG */ + C(0x0e00, MVCL, RR_a, Z, 0, 0, 0, 0, mvcl, 0) +/* MOVE LONG EXTENDED */ + C(0xa800, MVCLE, RS_a, Z, 0, a2, 0, 0, mvcle, 0) +/* MOVE LONG UNICODE */ + C(0xeb8e, MVCLU, RSY_a, E2, 0, a2, 0, 0, mvclu, 0) +/* MOVE NUMERICS */ + C(0xd100, MVN, SS_a, Z, la1, a2, 0, 0, mvn, 0) +/* MOVE PAGE */ + C(0xb254, MVPG, RRE, Z, 0, 0, 0, 0, mvpg, 0) +/* MOVE STRING */ + C(0xb255, MVST, RRE, Z, 0, 0, 0, 0, mvst, 0) +/* MOVE WITH OPTIONAL SPECIFICATION */ + C(0xc800, MVCOS, SSF, MVCOS, la1, a2, 0, 0, mvcos, 0) +/* MOVE WITH OFFSET */ + /* Really format SS_b, but we pack both lengths into one argument + for the helper call, so we might as well leave one 8-bit field. */ + C(0xf100, MVO, SS_a, Z, la1, a2, 0, 0, mvo, 0) +/* MOVE ZONES */ + C(0xd300, MVZ, SS_a, Z, la1, a2, 0, 0, mvz, 0) + +/* MULTIPLY */ + C(0x1c00, MR, RR_a, Z, r1p1_32s, r2_32s, new, r1_D32, mul, 0) + C(0xb9ec, MGRK, RRF_a, MIE2,r3_o, r2_o, r1_P, 0, muls128, 0) + C(0x5c00, M, RX_a, Z, r1p1_32s, m2_32s, new, r1_D32, mul, 0) + C(0xe35c, MFY, RXY_a, GIE, r1p1_32s, m2_32s, new, r1_D32, mul, 0) + C(0xe384, MG, RXY_a, MIE2,r1p1_o, m2_64, r1_P, 0, muls128, 0) + F(0xb317, MEEBR, RRE, Z, e1, e2, new, e1, meeb, 0, IF_BFP) + F(0xb31c, MDBR, RRE, Z, f1, f2, new, f1, mdb, 0, IF_BFP) + F(0xb34c, MXBR, RRE, Z, x2h, x2l, x1, x1, mxb, 0, IF_BFP) + F(0xb30c, MDEBR, RRE, Z, f1, e2, new, f1, mdeb, 0, IF_BFP) + F(0xb307, MXDBR, RRE, Z, 0, f2, x1, x1, mxdb, 0, IF_BFP) + F(0xed17, MEEB, RXE, Z, e1, m2_32u, new, e1, meeb, 0, IF_BFP) + F(0xed1c, MDB, RXE, Z, f1, m2_64, new, f1, mdb, 0, IF_BFP) + F(0xed0c, MDEB, RXE, Z, f1, m2_32u, new, f1, mdeb, 0, IF_BFP) + F(0xed07, MXDB, RXE, Z, 0, m2_64, x1, x1, mxdb, 0, IF_BFP) +/* MULTIPLY HALFWORD */ + C(0x4c00, MH, RX_a, Z, r1_o, m2_16s, new, r1_32, mul, 0) + C(0xe37c, MHY, RXY_a, GIE, r1_o, m2_16s, new, r1_32, mul, 0) + C(0xe33c, MGH, RXY_a, MIE2,r1_o, m2_16s, r1, 0, mul, 0) +/* MULTIPLY HALFWORD IMMEDIATE */ + C(0xa70c, MHI, RI_a, Z, r1_o, i2, new, r1_32, mul, 0) + C(0xa70d, MGHI, RI_a, Z, r1_o, i2, r1, 0, mul, 0) +/* MULTIPLY LOGICAL */ + C(0xb996, MLR, RRE, Z, r1p1_32u, r2_32u, new, r1_D32, mul, 0) + C(0xe396, ML, RXY_a, Z, r1p1_32u, m2_32u, new, r1_D32, mul, 0) + C(0xb986, MLGR, RRE, Z, r1p1, r2_o, r1_P, 0, mul128, 0) + C(0xe386, MLG, RXY_a, Z, r1p1, m2_64, r1_P, 0, mul128, 0) +/* MULTIPLY SINGLE */ + C(0xb252, MSR, RRE, Z, r1_o, r2_o, new, r1_32, mul, 0) + C(0xb9fd, MSRKC, RRF_a, MIE2,r3_32s, r2_32s, new, r1_32, mul, muls32) + C(0x7100, MS, RX_a, Z, r1_o, m2_32s, new, r1_32, mul, 0) + C(0xe351, MSY, RXY_a, LD, r1_o, m2_32s, new, r1_32, mul, 0) + C(0xe353, MSC, RXY_a, MIE2,r1_32s, m2_32s, new, r1_32, mul, muls32) + C(0xb90c, MSGR, RRE, Z, r1_o, r2_o, r1, 0, mul, 0) + C(0xb9ed, MSGRKC, RRF_a, MIE2,r3_o, r2_o, new_P, out2_r1, muls128, muls64) + C(0xb91c, MSGFR, RRE, Z, r1_o, r2_32s, r1, 0, mul, 0) + C(0xe30c, MSG, RXY_a, Z, r1_o, m2_64, r1, 0, mul, 0) + C(0xe383, MSGC, RXY_a, MIE2,r1_o, m2_64, new_P, out2_r1, muls128, muls64) + C(0xe31c, MSGF, RXY_a, Z, r1_o, m2_32s, r1, 0, mul, 0) +/* MULTIPLY SINGLE IMMEDIATE */ + C(0xc201, MSFI, RIL_a, GIE, r1_o, i2, new, r1_32, mul, 0) + C(0xc200, MSGFI, RIL_a, GIE, r1_o, i2, r1, 0, mul, 0) + +/* MULTIPLY AND ADD */ + F(0xb30e, MAEBR, RRD, Z, e1, e2, new, e1, maeb, 0, IF_BFP) + F(0xb31e, MADBR, RRD, Z, f1, f2, new, f1, madb, 0, IF_BFP) + F(0xed0e, MAEB, RXF, Z, e1, m2_32u, new, e1, maeb, 0, IF_BFP) + F(0xed1e, MADB, RXF, Z, f1, m2_64, new, f1, madb, 0, IF_BFP) +/* MULTIPLY AND SUBTRACT */ + F(0xb30f, MSEBR, RRD, Z, e1, e2, new, e1, mseb, 0, IF_BFP) + F(0xb31f, MSDBR, RRD, Z, f1, f2, new, f1, msdb, 0, IF_BFP) + F(0xed0f, MSEB, RXF, Z, e1, m2_32u, new, e1, mseb, 0, IF_BFP) + F(0xed1f, MSDB, RXF, Z, f1, m2_64, new, f1, msdb, 0, IF_BFP) + +/* OR */ + C(0x1600, OR, RR_a, Z, r1, r2, new, r1_32, or, nz32) + C(0xb9f6, ORK, RRF_a, DO, r2, r3, new, r1_32, or, nz32) + C(0x5600, O, RX_a, Z, r1, m2_32s, new, r1_32, or, nz32) + C(0xe356, OY, RXY_a, LD, r1, m2_32s, new, r1_32, or, nz32) + C(0xb981, OGR, RRE, Z, r1, r2, r1, 0, or, nz64) + C(0xb9e6, OGRK, RRF_a, DO, r2, r3, r1, 0, or, nz64) + C(0xe381, OG, RXY_a, Z, r1, m2_64, r1, 0, or, nz64) + C(0xd600, OC, SS_a, Z, la1, a2, 0, 0, oc, 0) +/* OR IMMEDIATE */ + D(0xc00c, OIHF, RIL_a, EI, r1_o, i2_32u, r1, 0, ori, 0, 0x2020) + D(0xc00d, OILF, RIL_a, EI, r1_o, i2_32u, r1, 0, ori, 0, 0x2000) + D(0xa508, OIHH, RI_a, Z, r1_o, i2_16u, r1, 0, ori, 0, 0x1030) + D(0xa509, OIHL, RI_a, Z, r1_o, i2_16u, r1, 0, ori, 0, 0x1020) + D(0xa50a, OILH, RI_a, Z, r1_o, i2_16u, r1, 0, ori, 0, 0x1010) + D(0xa50b, OILL, RI_a, Z, r1_o, i2_16u, r1, 0, ori, 0, 0x1000) + D(0x9600, OI, SI, Z, la1, i2_8u, new, 0, oi, nz64, MO_UB) + D(0xeb56, OIY, SIY, LD, la1, i2_8u, new, 0, oi, nz64, MO_UB) + +/* PACK */ + /* Really format SS_b, but we pack both lengths into one argument + for the helper call, so we might as well leave one 8-bit field. */ + C(0xf200, PACK, SS_a, Z, la1, a2, 0, 0, pack, 0) +/* PACK ASCII */ + C(0xe900, PKA, SS_f, E2, la1, a2, 0, 0, pka, 0) +/* PACK UNICODE */ + C(0xe100, PKU, SS_f, E2, la1, a2, 0, 0, pku, 0) + +/* PREFETCH */ + /* Implemented as nops of course. */ + C(0xe336, PFD, RXY_b, GIE, 0, 0, 0, 0, 0, 0) + C(0xc602, PFDRL, RIL_c, GIE, 0, 0, 0, 0, 0, 0) +/* PERFORM PROCESSOR ASSIST */ + /* Implemented as nop of course. */ + C(0xb2e8, PPA, RRF_c, PPA, 0, 0, 0, 0, 0, 0) + +/* POPULATION COUNT */ + C(0xb9e1, POPCNT, RRE, PC, 0, r2_o, r1, 0, popcnt, nz64) + +/* ROTATE LEFT SINGLE LOGICAL */ + C(0xeb1d, RLL, RSY_a, Z, r3_o, sh32, new, r1_32, rll32, 0) + C(0xeb1c, RLLG, RSY_a, Z, r3_o, sh64, r1, 0, rll64, 0) + +/* ROTATE THEN INSERT SELECTED BITS */ + C(0xec55, RISBG, RIE_f, GIE, 0, r2, r1, 0, risbg, s64) + C(0xec59, RISBGN, RIE_f, MIE, 0, r2, r1, 0, risbg, 0) + C(0xec5d, RISBHG, RIE_f, HW, 0, r2, r1, 0, risbg, 0) + C(0xec51, RISBLG, RIE_f, HW, 0, r2, r1, 0, risbg, 0) +/* ROTATE_THEN <OP> SELECTED BITS */ + C(0xec54, RNSBG, RIE_f, GIE, 0, r2, r1, 0, rosbg, 0) + C(0xec56, ROSBG, RIE_f, GIE, 0, r2, r1, 0, rosbg, 0) + C(0xec57, RXSBG, RIE_f, GIE, 0, r2, r1, 0, rosbg, 0) + +/* SEARCH STRING */ + C(0xb25e, SRST, RRE, Z, 0, 0, 0, 0, srst, 0) +/* SEARCH STRING UNICODE */ + C(0xb9be, SRSTU, RRE, ETF3, 0, 0, 0, 0, srstu, 0) + +/* SET ACCESS */ + C(0xb24e, SAR, RRE, Z, 0, r2_o, 0, 0, sar, 0) +/* SET ADDRESSING MODE */ + D(0x010c, SAM24, E, Z, 0, 0, 0, 0, sam, 0, 0) + D(0x010d, SAM31, E, Z, 0, 0, 0, 0, sam, 0, 1) + D(0x010e, SAM64, E, Z, 0, 0, 0, 0, sam, 0, 3) +/* SET FPC */ + F(0xb384, SFPC, RRE, Z, 0, r1_o, 0, 0, sfpc, 0, IF_BFP) +/* SET FPC AND SIGNAL */ + F(0xb385, SFASR, RRE, IEEEE_SIM, 0, r1_o, 0, 0, sfas, 0, IF_DFP) +/* SET BFP ROUNDING MODE */ + F(0xb299, SRNM, S, Z, la2, 0, 0, 0, srnm, 0, IF_BFP) + F(0xb2b8, SRNMB, S, FPE, la2, 0, 0, 0, srnmb, 0, IF_BFP) +/* SET DFP ROUNDING MODE */ + F(0xb2b9, SRNMT, S, DFPR, la2, 0, 0, 0, srnmt, 0, IF_DFP) +/* SET PROGRAM MASK */ + C(0x0400, SPM, RR_a, Z, r1, 0, 0, 0, spm, 0) + +/* SHIFT LEFT SINGLE */ + D(0x8b00, SLA, RS_a, Z, r1, sh32, new, r1_32, sla, 0, 31) + D(0xebdd, SLAK, RSY_a, DO, r3, sh32, new, r1_32, sla, 0, 31) + D(0xeb0b, SLAG, RSY_a, Z, r3, sh64, r1, 0, sla, 0, 63) +/* SHIFT LEFT SINGLE LOGICAL */ + C(0x8900, SLL, RS_a, Z, r1_o, sh32, new, r1_32, sll, 0) + C(0xebdf, SLLK, RSY_a, DO, r3_o, sh32, new, r1_32, sll, 0) + C(0xeb0d, SLLG, RSY_a, Z, r3_o, sh64, r1, 0, sll, 0) +/* SHIFT RIGHT SINGLE */ + C(0x8a00, SRA, RS_a, Z, r1_32s, sh32, new, r1_32, sra, s32) + C(0xebdc, SRAK, RSY_a, DO, r3_32s, sh32, new, r1_32, sra, s32) + C(0xeb0a, SRAG, RSY_a, Z, r3_o, sh64, r1, 0, sra, s64) +/* SHIFT RIGHT SINGLE LOGICAL */ + C(0x8800, SRL, RS_a, Z, r1_32u, sh32, new, r1_32, srl, 0) + C(0xebde, SRLK, RSY_a, DO, r3_32u, sh32, new, r1_32, srl, 0) + C(0xeb0c, SRLG, RSY_a, Z, r3_o, sh64, r1, 0, srl, 0) +/* SHIFT LEFT DOUBLE */ + D(0x8f00, SLDA, RS_a, Z, r1_D32, sh64, new, r1_D32, sla, 0, 31) +/* SHIFT LEFT DOUBLE LOGICAL */ + C(0x8d00, SLDL, RS_a, Z, r1_D32, sh64, new, r1_D32, sll, 0) +/* SHIFT RIGHT DOUBLE */ + C(0x8e00, SRDA, RS_a, Z, r1_D32, sh64, new, r1_D32, sra, s64) +/* SHIFT RIGHT DOUBLE LOGICAL */ + C(0x8c00, SRDL, RS_a, Z, r1_D32, sh64, new, r1_D32, srl, 0) + +/* SQUARE ROOT */ + F(0xb314, SQEBR, RRE, Z, 0, e2, new, e1, sqeb, 0, IF_BFP) + F(0xb315, SQDBR, RRE, Z, 0, f2, new, f1, sqdb, 0, IF_BFP) + F(0xb316, SQXBR, RRE, Z, x2h, x2l, new_P, x1, sqxb, 0, IF_BFP) + F(0xed14, SQEB, RXE, Z, 0, m2_32u, new, e1, sqeb, 0, IF_BFP) + F(0xed15, SQDB, RXE, Z, 0, m2_64, new, f1, sqdb, 0, IF_BFP) + +/* STORE */ + C(0x5000, ST, RX_a, Z, r1_o, a2, 0, 0, st32, 0) + C(0xe350, STY, RXY_a, LD, r1_o, a2, 0, 0, st32, 0) + C(0xe324, STG, RXY_a, Z, r1_o, a2, 0, 0, st64, 0) + F(0x6000, STD, RX_a, Z, f1, a2, 0, 0, st64, 0, IF_AFP1) + F(0xed67, STDY, RXY_a, LD, f1, a2, 0, 0, st64, 0, IF_AFP1) + F(0x7000, STE, RX_a, Z, e1, a2, 0, 0, st32, 0, IF_AFP1) + F(0xed66, STEY, RXY_a, LD, e1, a2, 0, 0, st32, 0, IF_AFP1) +/* STORE RELATIVE LONG */ + C(0xc40f, STRL, RIL_b, GIE, r1_o, ri2, 0, 0, st32, 0) + C(0xc40b, STGRL, RIL_b, GIE, r1_o, ri2, 0, 0, st64, 0) +/* STORE CHARACTER */ + C(0x4200, STC, RX_a, Z, r1_o, a2, 0, 0, st8, 0) + C(0xe372, STCY, RXY_a, LD, r1_o, a2, 0, 0, st8, 0) +/* STORE CHARACTER HIGH */ + C(0xe3c3, STCH, RXY_a, HW, r1_sr32, a2, 0, 0, st8, 0) +/* STORE CHARACTERS UNDER MASK */ + D(0xbe00, STCM, RS_b, Z, r1_o, a2, 0, 0, stcm, 0, 0) + D(0xeb2d, STCMY, RSY_b, LD, r1_o, a2, 0, 0, stcm, 0, 0) + D(0xeb2c, STCMH, RSY_b, Z, r1_o, a2, 0, 0, stcm, 0, 32) +/* STORE HALFWORD */ + C(0x4000, STH, RX_a, Z, r1_o, a2, 0, 0, st16, 0) + C(0xe370, STHY, RXY_a, LD, r1_o, a2, 0, 0, st16, 0) +/* STORE HALFWORD HIGH */ + C(0xe3c7, STHH, RXY_a, HW, r1_sr32, a2, 0, 0, st16, 0) +/* STORE HALFWORD RELATIVE LONG */ + C(0xc407, STHRL, RIL_b, GIE, r1_o, ri2, 0, 0, st16, 0) +/* STORE HIGH */ + C(0xe3cb, STFH, RXY_a, HW, r1_sr32, a2, 0, 0, st32, 0) +/* STORE ON CONDITION */ + D(0xebf3, STOC, RSY_b, LOC, 0, 0, 0, 0, soc, 0, 0) + D(0xebe3, STOCG, RSY_b, LOC, 0, 0, 0, 0, soc, 0, 1) +/* STORE HIGH ON CONDITION */ + D(0xebe1, STOCFH, RSY_b, LOC2, 0, 0, 0, 0, soc, 0, 2) +/* STORE REVERSED */ + C(0xe33f, STRVH, RXY_a, Z, la2, r1_16u, new, m1_16, rev16, 0) + C(0xe33e, STRV, RXY_a, Z, la2, r1_32u, new, m1_32, rev32, 0) + C(0xe32f, STRVG, RXY_a, Z, la2, r1_o, new, m1_64, rev64, 0) + +/* STORE CLOCK */ + F(0xb205, STCK, S, Z, la2, 0, new, m1_64, stck, 0, IF_IO) + F(0xb27c, STCKF, S, SCF, la2, 0, new, m1_64, stck, 0, IF_IO) +/* STORE CLOCK EXTENDED */ + F(0xb278, STCKE, S, Z, 0, a2, 0, 0, stcke, 0, IF_IO) + +/* STORE FACILITY LIST EXTENDED */ + C(0xb2b0, STFLE, S, SFLE, 0, a2, 0, 0, stfle, 0) +/* STORE FPC */ + F(0xb29c, STFPC, S, Z, 0, a2, new, m2_32, efpc, 0, IF_BFP) + +/* STORE MULTIPLE */ + D(0x9000, STM, RS_a, Z, 0, a2, 0, 0, stm, 0, 4) + D(0xeb90, STMY, RSY_a, LD, 0, a2, 0, 0, stm, 0, 4) + D(0xeb24, STMG, RSY_a, Z, 0, a2, 0, 0, stm, 0, 8) +/* STORE MULTIPLE HIGH */ + C(0xeb26, STMH, RSY_a, Z, 0, a2, 0, 0, stmh, 0) +/* STORE ACCESS MULTIPLE */ + C(0x9b00, STAM, RS_a, Z, 0, a2, 0, 0, stam, 0) + C(0xeb9b, STAMY, RSY_a, LD, 0, a2, 0, 0, stam, 0) +/* STORE PAIR TO QUADWORD */ + C(0xe38e, STPQ, RXY_a, Z, 0, a2, r1_P, 0, stpq, 0) + +/* SUBTRACT */ + C(0x1b00, SR, RR_a, Z, r1, r2, new, r1_32, sub, subs32) + C(0xb9f9, SRK, RRF_a, DO, r2, r3, new, r1_32, sub, subs32) + C(0x5b00, S, RX_a, Z, r1, m2_32s, new, r1_32, sub, subs32) + C(0xe35b, SY, RXY_a, LD, r1, m2_32s, new, r1_32, sub, subs32) + C(0xb909, SGR, RRE, Z, r1, r2, r1, 0, sub, subs64) + C(0xb919, SGFR, RRE, Z, r1, r2_32s, r1, 0, sub, subs64) + C(0xb9e9, SGRK, RRF_a, DO, r2, r3, r1, 0, sub, subs64) + C(0xe309, SG, RXY_a, Z, r1, m2_64, r1, 0, sub, subs64) + C(0xe319, SGF, RXY_a, Z, r1, m2_32s, r1, 0, sub, subs64) + F(0xb30b, SEBR, RRE, Z, e1, e2, new, e1, seb, f32, IF_BFP) + F(0xb31b, SDBR, RRE, Z, f1, f2, new, f1, sdb, f64, IF_BFP) + F(0xb34b, SXBR, RRE, Z, x2h, x2l, x1, x1, sxb, f128, IF_BFP) + F(0xed0b, SEB, RXE, Z, e1, m2_32u, new, e1, seb, f32, IF_BFP) + F(0xed1b, SDB, RXE, Z, f1, m2_64, new, f1, sdb, f64, IF_BFP) +/* SUBTRACT HALFWORD */ + C(0x4b00, SH, RX_a, Z, r1, m2_16s, new, r1_32, sub, subs32) + C(0xe37b, SHY, RXY_a, LD, r1, m2_16s, new, r1_32, sub, subs32) + C(0xe339, SGH, RXY_a, MIE2,r1, m2_16s, r1, 0, sub, subs64) +/* SUBTRACT HIGH */ + C(0xb9c9, SHHHR, RRF_a, HW, r2_sr32, r3_sr32, new, r1_32h, sub, subs32) + C(0xb9d9, SHHLR, RRF_a, HW, r2_sr32, r3, new, r1_32h, sub, subs32) +/* SUBTRACT LOGICAL */ + C(0x1f00, SLR, RR_a, Z, r1_32u, r2_32u, new, r1_32, sub, subu32) + C(0xb9fb, SLRK, RRF_a, DO, r2_32u, r3_32u, new, r1_32, sub, subu32) + C(0x5f00, SL, RX_a, Z, r1_32u, m2_32u, new, r1_32, sub, subu32) + C(0xe35f, SLY, RXY_a, LD, r1_32u, m2_32u, new, r1_32, sub, subu32) + C(0xb90b, SLGR, RRE, Z, r1, r2, r1, 0, subu64, subu64) + C(0xb91b, SLGFR, RRE, Z, r1, r2_32u, r1, 0, subu64, subu64) + C(0xb9eb, SLGRK, RRF_a, DO, r2, r3, r1, 0, subu64, subu64) + C(0xe30b, SLG, RXY_a, Z, r1, m2_64, r1, 0, subu64, subu64) + C(0xe31b, SLGF, RXY_a, Z, r1, m2_32u, r1, 0, subu64, subu64) +/* SUBTRACT LOCICAL HIGH */ + C(0xb9cb, SLHHHR, RRF_a, HW, r2_sr32, r3_sr32, new, r1_32h, sub, subu32) + C(0xb9db, SLHHLR, RRF_a, HW, r2_sr32, r3_32u, new, r1_32h, sub, subu32) +/* SUBTRACT LOGICAL IMMEDIATE */ + C(0xc205, SLFI, RIL_a, EI, r1_32u, i2_32u, new, r1_32, sub, subu32) + C(0xc204, SLGFI, RIL_a, EI, r1, i2_32u, r1, 0, subu64, subu64) +/* SUBTRACT LOGICAL WITH BORROW */ + C(0xb999, SLBR, RRE, Z, r1_32u, r2_32u, new, r1_32, subb32, subu32) + C(0xb989, SLBGR, RRE, Z, r1, r2, r1, 0, subb64, subu64) + C(0xe399, SLB, RXY_a, Z, r1_32u, m2_32u, new, r1_32, subb32, subu32) + C(0xe389, SLBG, RXY_a, Z, r1, m2_64, r1, 0, subb64, subu64) + +/* SUPERVISOR CALL */ + C(0x0a00, SVC, I, Z, 0, 0, 0, 0, svc, 0) + +/* TEST ADDRESSING MODE */ + C(0x010b, TAM, E, Z, 0, 0, 0, 0, tam, 0) + +/* TEST AND SET */ + C(0x9300, TS, S, Z, 0, a2, 0, 0, ts, 0) + +/* TEST DATA CLASS */ + F(0xed10, TCEB, RXE, Z, e1, a2, 0, 0, tceb, 0, IF_BFP) + F(0xed11, TCDB, RXE, Z, f1, a2, 0, 0, tcdb, 0, IF_BFP) + F(0xed12, TCXB, RXE, Z, 0, a2, x1, 0, tcxb, 0, IF_BFP) + +/* TEST DECIMAL */ + C(0xebc0, TP, RSL, E2, la1, 0, 0, 0, tp, 0) + +/* TEST UNDER MASK */ + C(0x9100, TM, SI, Z, m1_8u, i2_8u, 0, 0, 0, tm32) + C(0xeb51, TMY, SIY, LD, m1_8u, i2_8u, 0, 0, 0, tm32) + D(0xa702, TMHH, RI_a, Z, r1_o, i2_16u_shl, 0, 0, 0, tm64, 48) + D(0xa703, TMHL, RI_a, Z, r1_o, i2_16u_shl, 0, 0, 0, tm64, 32) + D(0xa700, TMLH, RI_a, Z, r1_o, i2_16u_shl, 0, 0, 0, tm64, 16) + D(0xa701, TMLL, RI_a, Z, r1_o, i2_16u_shl, 0, 0, 0, tm64, 0) + +/* TRANSLATE */ + C(0xdc00, TR, SS_a, Z, la1, a2, 0, 0, tr, 0) +/* TRANSLATE AND TEST */ + C(0xdd00, TRT, SS_a, Z, la1, a2, 0, 0, trt, 0) +/* TRANSLATE AND TEST REVERSE */ + C(0xd000, TRTR, SS_a, ETF3, la1, a2, 0, 0, trtr, 0) +/* TRANSLATE EXTENDED */ + C(0xb2a5, TRE, RRE, Z, 0, r2, r1_P, 0, tre, 0) + +/* TRANSLATE ONE TO ONE */ + C(0xb993, TROO, RRF_c, E2, 0, 0, 0, 0, trXX, 0) +/* TRANSLATE ONE TO TWO */ + C(0xb992, TROT, RRF_c, E2, 0, 0, 0, 0, trXX, 0) +/* TRANSLATE TWO TO ONE */ + C(0xb991, TRTO, RRF_c, E2, 0, 0, 0, 0, trXX, 0) +/* TRANSLATE TWO TO TWO */ + C(0xb990, TRTT, RRF_c, E2, 0, 0, 0, 0, trXX, 0) + +/* UNPACK */ + /* Really format SS_b, but we pack both lengths into one argument + for the helper call, so we might as well leave one 8-bit field. */ + C(0xf300, UNPK, SS_a, Z, la1, a2, 0, 0, unpk, 0) +/* UNPACK ASCII */ + C(0xea00, UNPKA, SS_a, E2, la1, a2, 0, 0, unpka, 0) +/* UNPACK UNICODE */ + C(0xe200, UNPKU, SS_a, E2, la1, a2, 0, 0, unpku, 0) + +/* MSA Instructions */ + D(0xb91e, KMAC, RRE, MSA, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KMAC) + D(0xb928, PCKMO, RRE, MSA3, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_PCKMO) + D(0xb92a, KMF, RRE, MSA4, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KMF) + D(0xb92b, KMO, RRE, MSA4, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KMO) + D(0xb92c, PCC, RRE, MSA4, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_PCC) + D(0xb92d, KMCTR, RRF_b, MSA4, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KMCTR) + D(0xb92e, KM, RRE, MSA, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KM) + D(0xb92f, KMC, RRE, MSA, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KMC) + D(0xb929, KMA, RRF_b, MSA8, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KMA) + D(0xb93c, PPNO, RRE, MSA5, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_PPNO) + D(0xb93e, KIMD, RRE, MSA, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KIMD) + D(0xb93f, KLMD, RRE, MSA, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KLMD) + +/* === Vector Support Instructions === */ + +/* VECTOR BIT PERMUTE */ + E(0xe785, VBPERM, VRR_c, VE, 0, 0, 0, 0, vbperm, 0, 0, IF_VEC) +/* VECTOR GATHER ELEMENT */ + E(0xe713, VGEF, VRV, V, la2, 0, 0, 0, vge, 0, ES_32, IF_VEC) + E(0xe712, VGEG, VRV, V, la2, 0, 0, 0, vge, 0, ES_64, IF_VEC) +/* VECTOR GENERATE BYTE MASK */ + F(0xe744, VGBM, VRI_a, V, 0, 0, 0, 0, vgbm, 0, IF_VEC) +/* VECTOR GENERATE MASK */ + F(0xe746, VGM, VRI_b, V, 0, 0, 0, 0, vgm, 0, IF_VEC) +/* VECTOR LOAD */ + F(0xe706, VL, VRX, V, la2, 0, 0, 0, vl, 0, IF_VEC) + F(0xe756, VLR, VRR_a, V, 0, 0, 0, 0, vlr, 0, IF_VEC) +/* VECTOR LOAD AND REPLICATE */ + F(0xe705, VLREP, VRX, V, la2, 0, 0, 0, vlrep, 0, IF_VEC) +/* VECTOR LOAD ELEMENT */ + E(0xe700, VLEB, VRX, V, la2, 0, 0, 0, vle, 0, ES_8, IF_VEC) + E(0xe701, VLEH, VRX, V, la2, 0, 0, 0, vle, 0, ES_16, IF_VEC) + E(0xe703, VLEF, VRX, V, la2, 0, 0, 0, vle, 0, ES_32, IF_VEC) + E(0xe702, VLEG, VRX, V, la2, 0, 0, 0, vle, 0, ES_64, IF_VEC) +/* VECTOR LOAD ELEMENT IMMEDIATE */ + E(0xe740, VLEIB, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_8, IF_VEC) + E(0xe741, VLEIH, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_16, IF_VEC) + E(0xe743, VLEIF, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_32, IF_VEC) + E(0xe742, VLEIG, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_64, IF_VEC) +/* VECTOR LOAD GR FROM VR ELEMENT */ + F(0xe721, VLGV, VRS_c, V, la2, 0, r1, 0, vlgv, 0, IF_VEC) +/* VECTOR LOAD LOGICAL ELEMENT AND ZERO */ + F(0xe704, VLLEZ, VRX, V, la2, 0, 0, 0, vllez, 0, IF_VEC) +/* VECTOR LOAD MULTIPLE */ + F(0xe736, VLM, VRS_a, V, la2, 0, 0, 0, vlm, 0, IF_VEC) +/* VECTOR LOAD TO BLOCK BOUNDARY */ + F(0xe707, VLBB, VRX, V, la2, 0, 0, 0, vlbb, 0, IF_VEC) +/* VECTOR LOAD VR ELEMENT FROM GR */ + F(0xe722, VLVG, VRS_b, V, la2, r3, 0, 0, vlvg, 0, IF_VEC) +/* VECTOR LOAD VR FROM GRS DISJOINT */ + F(0xe762, VLVGP, VRR_f, V, r2, r3, 0, 0, vlvgp, 0, IF_VEC) +/* VECTOR LOAD WITH LENGTH */ + F(0xe737, VLL, VRS_b, V, la2, r3_32u, 0, 0, vll, 0, IF_VEC) +/* VECTOR MERGE HIGH */ + F(0xe761, VMRH, VRR_c, V, 0, 0, 0, 0, vmr, 0, IF_VEC) +/* VECTOR MERGE LOW */ + F(0xe760, VMRL, VRR_c, V, 0, 0, 0, 0, vmr, 0, IF_VEC) +/* VECTOR PACK */ + F(0xe794, VPK, VRR_c, V, 0, 0, 0, 0, vpk, 0, IF_VEC) +/* VECTOR PACK SATURATE */ + F(0xe797, VPKS, VRR_b, V, 0, 0, 0, 0, vpk, 0, IF_VEC) +/* VECTOR PACK LOGICAL SATURATE */ + F(0xe795, VPKLS, VRR_b, V, 0, 0, 0, 0, vpk, 0, IF_VEC) + F(0xe78c, VPERM, VRR_e, V, 0, 0, 0, 0, vperm, 0, IF_VEC) +/* VECTOR PERMUTE DOUBLEWORD IMMEDIATE */ + F(0xe784, VPDI, VRR_c, V, 0, 0, 0, 0, vpdi, 0, IF_VEC) +/* VECTOR REPLICATE */ + F(0xe74d, VREP, VRI_c, V, 0, 0, 0, 0, vrep, 0, IF_VEC) +/* VECTOR REPLICATE IMMEDIATE */ + F(0xe745, VREPI, VRI_a, V, 0, 0, 0, 0, vrepi, 0, IF_VEC) +/* VECTOR SCATTER ELEMENT */ + E(0xe71b, VSCEF, VRV, V, la2, 0, 0, 0, vsce, 0, ES_32, IF_VEC) + E(0xe71a, VSCEG, VRV, V, la2, 0, 0, 0, vsce, 0, ES_64, IF_VEC) +/* VECTOR SELECT */ + F(0xe78d, VSEL, VRR_e, V, 0, 0, 0, 0, vsel, 0, IF_VEC) +/* VECTOR SIGN EXTEND TO DOUBLEWORD */ + F(0xe75f, VSEG, VRR_a, V, 0, 0, 0, 0, vseg, 0, IF_VEC) +/* VECTOR STORE */ + F(0xe70e, VST, VRX, V, la2, 0, 0, 0, vst, 0, IF_VEC) +/* VECTOR STORE ELEMENT */ + E(0xe708, VSTEB, VRX, V, la2, 0, 0, 0, vste, 0, ES_8, IF_VEC) + E(0xe709, VSTEH, VRX, V, la2, 0, 0, 0, vste, 0, ES_16, IF_VEC) + E(0xe70b, VSTEF, VRX, V, la2, 0, 0, 0, vste, 0, ES_32, IF_VEC) + E(0xe70a, VSTEG, VRX, V, la2, 0, 0, 0, vste, 0, ES_64, IF_VEC) +/* VECTOR STORE MULTIPLE */ + F(0xe73e, VSTM, VRS_a, V, la2, 0, 0, 0, vstm, 0, IF_VEC) +/* VECTOR STORE WITH LENGTH */ + F(0xe73f, VSTL, VRS_b, V, la2, r3_32u, 0, 0, vstl, 0, IF_VEC) +/* VECTOR UNPACK HIGH */ + F(0xe7d7, VUPH, VRR_a, V, 0, 0, 0, 0, vup, 0, IF_VEC) +/* VECTOR UNPACK LOGICAL HIGH */ + F(0xe7d5, VUPLH, VRR_a, V, 0, 0, 0, 0, vup, 0, IF_VEC) +/* VECTOR UNPACK LOW */ + F(0xe7d6, VUPL, VRR_a, V, 0, 0, 0, 0, vup, 0, IF_VEC) +/* VECTOR UNPACK LOGICAL LOW */ + F(0xe7d4, VUPLL, VRR_a, V, 0, 0, 0, 0, vup, 0, IF_VEC) + +/* === Vector Integer Instructions === */ + +/* VECTOR ADD */ + F(0xe7f3, VA, VRR_c, V, 0, 0, 0, 0, va, 0, IF_VEC) +/* VECTOR ADD COMPUTE CARRY */ + F(0xe7f1, VACC, VRR_c, V, 0, 0, 0, 0, vacc, 0, IF_VEC) +/* VECTOR ADD WITH CARRY */ + F(0xe7bb, VAC, VRR_d, V, 0, 0, 0, 0, vac, 0, IF_VEC) +/* VECTOR ADD WITH CARRY COMPUTE CARRY */ + F(0xe7b9, VACCC, VRR_d, V, 0, 0, 0, 0, vaccc, 0, IF_VEC) +/* VECTOR AND */ + F(0xe768, VN, VRR_c, V, 0, 0, 0, 0, vn, 0, IF_VEC) +/* VECTOR AND WITH COMPLEMENT */ + F(0xe769, VNC, VRR_c, V, 0, 0, 0, 0, vnc, 0, IF_VEC) +/* VECTOR AVERAGE */ + F(0xe7f2, VAVG, VRR_c, V, 0, 0, 0, 0, vavg, 0, IF_VEC) +/* VECTOR AVERAGE LOGICAL */ + F(0xe7f0, VAVGL, VRR_c, V, 0, 0, 0, 0, vavgl, 0, IF_VEC) +/* VECTOR CHECKSUM */ + F(0xe766, VCKSM, VRR_c, V, 0, 0, 0, 0, vcksm, 0, IF_VEC) +/* VECTOR ELEMENT COMPARE */ + F(0xe7db, VEC, VRR_a, V, 0, 0, 0, 0, vec, cmps64, IF_VEC) +/* VECTOR ELEMENT COMPARE LOGICAL */ + F(0xe7d9, VECL, VRR_a, V, 0, 0, 0, 0, vec, cmpu64, IF_VEC) +/* VECTOR COMPARE EQUAL */ + E(0xe7f8, VCEQ, VRR_b, V, 0, 0, 0, 0, vc, 0, TCG_COND_EQ, IF_VEC) +/* VECTOR COMPARE HIGH */ + E(0xe7fb, VCH, VRR_b, V, 0, 0, 0, 0, vc, 0, TCG_COND_GT, IF_VEC) +/* VECTOR COMPARE HIGH LOGICAL */ + E(0xe7f9, VCHL, VRR_b, V, 0, 0, 0, 0, vc, 0, TCG_COND_GTU, IF_VEC) +/* VECTOR COUNT LEADING ZEROS */ + F(0xe753, VCLZ, VRR_a, V, 0, 0, 0, 0, vclz, 0, IF_VEC) +/* VECTOR COUNT TRAILING ZEROS */ + F(0xe752, VCTZ, VRR_a, V, 0, 0, 0, 0, vctz, 0, IF_VEC) +/* VECTOR EXCLUSIVE OR */ + F(0xe76d, VX, VRR_c, V, 0, 0, 0, 0, vx, 0, IF_VEC) +/* VECTOR GALOIS FIELD MULTIPLY SUM */ + F(0xe7b4, VGFM, VRR_c, V, 0, 0, 0, 0, vgfm, 0, IF_VEC) +/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */ + F(0xe7bc, VGFMA, VRR_d, V, 0, 0, 0, 0, vgfma, 0, IF_VEC) +/* VECTOR LOAD COMPLEMENT */ + F(0xe7de, VLC, VRR_a, V, 0, 0, 0, 0, vlc, 0, IF_VEC) +/* VECTOR LOAD POSITIVE */ + F(0xe7df, VLP, VRR_a, V, 0, 0, 0, 0, vlp, 0, IF_VEC) +/* VECTOR MAXIMUM */ + F(0xe7ff, VMX, VRR_c, V, 0, 0, 0, 0, vmx, 0, IF_VEC) +/* VECTOR MAXIMUM LOGICAL */ + F(0xe7fd, VMXL, VRR_c, V, 0, 0, 0, 0, vmx, 0, IF_VEC) +/* VECTOR MINIMUM */ + F(0xe7fe, VMN, VRR_c, V, 0, 0, 0, 0, vmx, 0, IF_VEC) +/* VECTOR MINIMUM LOGICAL */ + F(0xe7fc, VMNL, VRR_c, V, 0, 0, 0, 0, vmx, 0, IF_VEC) +/* VECTOR MULTIPLY AND ADD LOW */ + F(0xe7aa, VMAL, VRR_d, V, 0, 0, 0, 0, vma, 0, IF_VEC) +/* VECTOR MULTIPLY AND ADD HIGH */ + F(0xe7ab, VMAH, VRR_d, V, 0, 0, 0, 0, vma, 0, IF_VEC) +/* VECTOR MULTIPLY AND ADD LOGICAL HIGH */ + F(0xe7a9, VMALH, VRR_d, V, 0, 0, 0, 0, vma, 0, IF_VEC) +/* VECTOR MULTIPLY AND ADD EVEN */ + F(0xe7ae, VMAE, VRR_d, V, 0, 0, 0, 0, vma, 0, IF_VEC) +/* VECTOR MULTIPLY AND ADD LOGICAL EVEN */ + F(0xe7ac, VMALE, VRR_d, V, 0, 0, 0, 0, vma, 0, IF_VEC) +/* VECTOR MULTIPLY AND ADD ODD */ + F(0xe7af, VMAO, VRR_d, V, 0, 0, 0, 0, vma, 0, IF_VEC) +/* VECTOR MULTIPLY AND ADD LOGICAL ODD */ + F(0xe7ad, VMALO, VRR_d, V, 0, 0, 0, 0, vma, 0, IF_VEC) +/* VECTOR MULTIPLY HIGH */ + F(0xe7a3, VMH, VRR_c, V, 0, 0, 0, 0, vm, 0, IF_VEC) +/* VECTOR MULTIPLY LOGICAL HIGH */ + F(0xe7a1, VMLH, VRR_c, V, 0, 0, 0, 0, vm, 0, IF_VEC) +/* VECTOR MULTIPLY LOW */ + F(0xe7a2, VML, VRR_c, V, 0, 0, 0, 0, vm, 0, IF_VEC) +/* VECTOR MULTIPLY EVEN */ + F(0xe7a6, VME, VRR_c, V, 0, 0, 0, 0, vm, 0, IF_VEC) +/* VECTOR MULTIPLY LOGICAL EVEN */ + F(0xe7a4, VMLE, VRR_c, V, 0, 0, 0, 0, vm, 0, IF_VEC) +/* VECTOR MULTIPLY ODD */ + F(0xe7a7, VMO, VRR_c, V, 0, 0, 0, 0, vm, 0, IF_VEC) +/* VECTOR MULTIPLY LOGICAL ODD */ + F(0xe7a5, VMLO, VRR_c, V, 0, 0, 0, 0, vm, 0, IF_VEC) +/* VECTOR MULTIPLY SUM LOGICAL */ + F(0xe7b8, VMSL, VRR_d, VE, 0, 0, 0, 0, vmsl, 0, IF_VEC) +/* VECTOR NAND */ + F(0xe76e, VNN, VRR_c, VE, 0, 0, 0, 0, vnn, 0, IF_VEC) +/* VECTOR NOR */ + F(0xe76b, VNO, VRR_c, V, 0, 0, 0, 0, vno, 0, IF_VEC) +/* VECTOR NOT EXCLUSIVE OR */ + F(0xe76c, VNX, VRR_c, VE, 0, 0, 0, 0, vnx, 0, IF_VEC) +/* VECTOR OR */ + F(0xe76a, VO, VRR_c, V, 0, 0, 0, 0, vo, 0, IF_VEC) +/* VECTOR OR WITH COMPLEMENT */ + F(0xe76f, VOC, VRR_c, VE, 0, 0, 0, 0, voc, 0, IF_VEC) +/* VECTOR POPULATION COUNT */ + F(0xe750, VPOPCT, VRR_a, V, 0, 0, 0, 0, vpopct, 0, IF_VEC) +/* VECTOR ELEMENT ROTATE LEFT LOGICAL */ + F(0xe773, VERLLV, VRR_c, V, 0, 0, 0, 0, vesv, 0, IF_VEC) + F(0xe733, VERLL, VRS_a, V, la2, 0, 0, 0, ves, 0, IF_VEC) +/* VECTOR ELEMENT ROTATE AND INSERT UNDER MASK */ + F(0xe772, VERIM, VRI_d, V, 0, 0, 0, 0, verim, 0, IF_VEC) +/* VECTOR ELEMENT SHIFT LEFT */ + F(0xe770, VESLV, VRR_c, V, 0, 0, 0, 0, vesv, 0, IF_VEC) + F(0xe730, VESL, VRS_a, V, la2, 0, 0, 0, ves, 0, IF_VEC) +/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */ + F(0xe77a, VESRAV, VRR_c, V, 0, 0, 0, 0, vesv, 0, IF_VEC) + F(0xe73a, VESRA, VRS_a, V, la2, 0, 0, 0, ves, 0, IF_VEC) +/* VECTOR ELEMENT SHIFT RIGHT LOGICAL */ + F(0xe778, VESRLV, VRR_c, V, 0, 0, 0, 0, vesv, 0, IF_VEC) + F(0xe738, VESRL, VRS_a, V, la2, 0, 0, 0, ves, 0, IF_VEC) +/* VECTOR SHIFT LEFT */ + F(0xe774, VSL, VRR_c, V, 0, 0, 0, 0, vsl, 0, IF_VEC) +/* VECTOR SHIFT LEFT BY BYTE */ + F(0xe775, VSLB, VRR_c, V, 0, 0, 0, 0, vsl, 0, IF_VEC) +/* VECTOR SHIFT LEFT DOUBLE BY BYTE */ + F(0xe777, VSLDB, VRI_d, V, 0, 0, 0, 0, vsldb, 0, IF_VEC) +/* VECTOR SHIFT RIGHT ARITHMETIC */ + F(0xe77e, VSRA, VRR_c, V, 0, 0, 0, 0, vsra, 0, IF_VEC) +/* VECTOR SHIFT RIGHT ARITHMETIC BY BYTE */ + F(0xe77f, VSRAB, VRR_c, V, 0, 0, 0, 0, vsra, 0, IF_VEC) +/* VECTOR SHIFT RIGHT LOGICAL */ + F(0xe77c, VSRL, VRR_c, V, 0, 0, 0, 0, vsrl, 0, IF_VEC) +/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */ + F(0xe77d, VSRLB, VRR_c, V, 0, 0, 0, 0, vsrl, 0, IF_VEC) +/* VECTOR SUBTRACT */ + F(0xe7f7, VS, VRR_c, V, 0, 0, 0, 0, vs, 0, IF_VEC) +/* VECTOR SUBTRACT COMPUTE BORROW INDICATION */ + F(0xe7f5, VSCBI, VRR_c, V, 0, 0, 0, 0, vscbi, 0, IF_VEC) +/* VECTOR SUBTRACT WITH BORROW INDICATION */ + F(0xe7bf, VSBI, VRR_d, V, 0, 0, 0, 0, vsbi, 0, IF_VEC) +/* VECTOR SUBTRACT WITH BORROW COMPUTE BORROW INDICATION */ + F(0xe7bd, VSBCBI, VRR_d, V, 0, 0, 0, 0, vsbcbi, 0, IF_VEC) +/* VECTOR SUM ACROSS DOUBLEWORD */ + F(0xe765, VSUMG, VRR_c, V, 0, 0, 0, 0, vsumg, 0, IF_VEC) +/* VECTOR SUM ACROSS QUADWORD */ + F(0xe767, VSUMQ, VRR_c, V, 0, 0, 0, 0, vsumq, 0, IF_VEC) +/* VECTOR SUM ACROSS WORD */ + F(0xe764, VSUM, VRR_c, V, 0, 0, 0, 0, vsum, 0, IF_VEC) +/* VECTOR TEST UNDER MASK */ + F(0xe7d8, VTM, VRR_a, V, 0, 0, 0, 0, vtm, 0, IF_VEC) + +/* === Vector String Instructions === */ + +/* VECTOR FIND ANY ELEMENT EQUAL */ + F(0xe782, VFAE, VRR_b, V, 0, 0, 0, 0, vfae, 0, IF_VEC) +/* VECTOR FIND ELEMENT EQUAL */ + F(0xe780, VFEE, VRR_b, V, 0, 0, 0, 0, vfee, 0, IF_VEC) +/* VECTOR FIND ELEMENT NOT EQUAL */ + F(0xe781, VFENE, VRR_b, V, 0, 0, 0, 0, vfene, 0, IF_VEC) +/* VECTOR ISOLATE STRING */ + F(0xe75c, VISTR, VRR_a, V, 0, 0, 0, 0, vistr, 0, IF_VEC) +/* VECTOR STRING RANGE COMPARE */ + F(0xe78a, VSTRC, VRR_d, V, 0, 0, 0, 0, vstrc, 0, IF_VEC) + +/* === Vector Floating-Point Instructions */ + +/* VECTOR FP ADD */ + F(0xe7e3, VFA, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC) +/* VECTOR FP COMPARE SCALAR */ + F(0xe7cb, WFC, VRR_a, V, 0, 0, 0, 0, wfc, 0, IF_VEC) +/* VECTOR FP COMPARE AND SIGNAL SCALAR */ + F(0xe7ca, WFK, VRR_a, V, 0, 0, 0, 0, wfc, 0, IF_VEC) +/* VECTOR FP COMPARE EQUAL */ + F(0xe7e8, VFCE, VRR_c, V, 0, 0, 0, 0, vfc, 0, IF_VEC) +/* VECTOR FP COMPARE HIGH */ + F(0xe7eb, VFCH, VRR_c, V, 0, 0, 0, 0, vfc, 0, IF_VEC) +/* VECTOR FP COMPARE HIGH OR EQUAL */ + F(0xe7ea, VFCHE, VRR_c, V, 0, 0, 0, 0, vfc, 0, IF_VEC) +/* VECTOR FP CONVERT FROM FIXED 64-BIT */ + F(0xe7c3, VCDG, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC) +/* VECTOR FP CONVERT FROM LOGICAL 64-BIT */ + F(0xe7c1, VCDLG, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC) +/* VECTOR FP CONVERT TO FIXED 64-BIT */ + F(0xe7c2, VCGD, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC) +/* VECTOR FP CONVERT TO LOGICAL 64-BIT */ + F(0xe7c0, VCLGD, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC) +/* VECTOR FP DIVIDE */ + F(0xe7e5, VFD, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC) +/* VECTOR LOAD FP INTEGER */ + F(0xe7c7, VFI, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC) +/* VECTOR FP LOAD LENGTHENED */ + F(0xe7c4, VFLL, VRR_a, V, 0, 0, 0, 0, vfll, 0, IF_VEC) +/* VECTOR FP LOAD ROUNDED */ + F(0xe7c5, VFLR, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC) +/* VECTOR FP MAXIMUM */ + F(0xe7ef, VFMAX, VRR_c, VE, 0, 0, 0, 0, vfmax, 0, IF_VEC) +/* VECTOR FP MINIMUM */ + F(0xe7ee, VFMIN, VRR_c, VE, 0, 0, 0, 0, vfmax, 0, IF_VEC) +/* VECTOR FP MULTIPLY */ + F(0xe7e7, VFM, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC) +/* VECTOR FP MULTIPLY AND ADD */ + F(0xe78f, VFMA, VRR_e, V, 0, 0, 0, 0, vfma, 0, IF_VEC) +/* VECTOR FP MULTIPLY AND SUBTRACT */ + F(0xe78e, VFMS, VRR_e, V, 0, 0, 0, 0, vfma, 0, IF_VEC) +/* VECTOR FP NEGATIVE MULTIPLY AND ADD */ + F(0xe79f, VFNMA, VRR_e, VE, 0, 0, 0, 0, vfma, 0, IF_VEC) +/* VECTOR FP NEGATIVE MULTIPLY AND SUBTRACT */ + F(0xe79e, VFNMS, VRR_e, VE, 0, 0, 0, 0, vfma, 0, IF_VEC) +/* VECTOR FP PERFORM SIGN OPERATION */ + F(0xe7cc, VFPSO, VRR_a, V, 0, 0, 0, 0, vfpso, 0, IF_VEC) +/* VECTOR FP SQUARE ROOT */ + F(0xe7ce, VFSQ, VRR_a, V, 0, 0, 0, 0, vfsq, 0, IF_VEC) +/* VECTOR FP SUBTRACT */ + F(0xe7e2, VFS, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC) +/* VECTOR FP TEST DATA CLASS IMMEDIATE */ + F(0xe74a, VFTCI, VRI_e, V, 0, 0, 0, 0, vftci, 0, IF_VEC) + +#ifndef CONFIG_USER_ONLY +/* COMPARE AND SWAP AND PURGE */ + E(0xb250, CSP, RRE, Z, r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV) + E(0xb98a, CSPG, RRE, DAT_ENH, r1_o, ra2, r1_P, 0, csp, 0, MO_TEQ, IF_PRIV) +/* DIAGNOSE (KVM hypercall) */ + F(0x8300, DIAG, RSI, Z, 0, 0, 0, 0, diag, 0, IF_PRIV | IF_IO) +/* INSERT STORAGE KEY EXTENDED */ + F(0xb229, ISKE, RRE, Z, 0, r2_o, new, r1_8, iske, 0, IF_PRIV) +/* INVALIDATE DAT TABLE ENTRY */ + F(0xb98e, IPDE, RRF_b, Z, r1_o, r2_o, 0, 0, idte, 0, IF_PRIV) +/* INVALIDATE PAGE TABLE ENTRY */ + F(0xb221, IPTE, RRF_a, Z, r1_o, r2_o, 0, 0, ipte, 0, IF_PRIV) +/* LOAD CONTROL */ + F(0xb700, LCTL, RS_a, Z, 0, a2, 0, 0, lctl, 0, IF_PRIV) + F(0xeb2f, LCTLG, RSY_a, Z, 0, a2, 0, 0, lctlg, 0, IF_PRIV) +/* LOAD PROGRAM PARAMETER */ + F(0xb280, LPP, S, LPP, 0, m2_64, 0, 0, lpp, 0, IF_PRIV) +/* LOAD PSW */ + F(0x8200, LPSW, S, Z, 0, a2, 0, 0, lpsw, 0, IF_PRIV) +/* LOAD PSW EXTENDED */ + F(0xb2b2, LPSWE, S, Z, 0, a2, 0, 0, lpswe, 0, IF_PRIV) +/* LOAD REAL ADDRESS */ + F(0xb100, LRA, RX_a, Z, 0, a2, r1, 0, lra, 0, IF_PRIV) + F(0xe313, LRAY, RXY_a, LD, 0, a2, r1, 0, lra, 0, IF_PRIV) + F(0xe303, LRAG, RXY_a, Z, 0, a2, r1, 0, lra, 0, IF_PRIV) +/* LOAD USING REAL ADDRESS */ + E(0xb24b, LURA, RRE, Z, 0, ra2, new, r1_32, lura, 0, MO_TEUL, IF_PRIV) + E(0xb905, LURAG, RRE, Z, 0, ra2, r1, 0, lura, 0, MO_TEQ, IF_PRIV) +/* MOVE TO PRIMARY */ + F(0xda00, MVCP, SS_d, Z, la1, a2, 0, 0, mvcp, 0, IF_PRIV) +/* MOVE TO SECONDARY */ + F(0xdb00, MVCS, SS_d, Z, la1, a2, 0, 0, mvcs, 0, IF_PRIV) +/* PURGE TLB */ + F(0xb20d, PTLB, S, Z, 0, 0, 0, 0, ptlb, 0, IF_PRIV) +/* RESET REFERENCE BIT EXTENDED */ + F(0xb22a, RRBE, RRE, Z, 0, r2_o, 0, 0, rrbe, 0, IF_PRIV) +/* SERVICE CALL LOGICAL PROCESSOR (PV hypercall) */ + F(0xb220, SERVC, RRE, Z, r1_o, r2_o, 0, 0, servc, 0, IF_PRIV | IF_IO) +/* SET ADDRESS SPACE CONTROL FAST */ + F(0xb279, SACF, S, Z, 0, a2, 0, 0, sacf, 0, IF_PRIV) +/* SET CLOCK */ + F(0xb204, SCK, S, Z, la2, 0, 0, 0, sck, 0, IF_PRIV | IF_IO) +/* SET CLOCK COMPARATOR */ + F(0xb206, SCKC, S, Z, 0, m2_64a, 0, 0, sckc, 0, IF_PRIV | IF_IO) +/* SET CLOCK PROGRAMMABLE FIELD */ + F(0x0107, SCKPF, E, Z, 0, 0, 0, 0, sckpf, 0, IF_PRIV) +/* SET CPU TIMER */ + F(0xb208, SPT, S, Z, 0, m2_64a, 0, 0, spt, 0, IF_PRIV | IF_IO) +/* SET PREFIX */ + F(0xb210, SPX, S, Z, 0, m2_32ua, 0, 0, spx, 0, IF_PRIV) +/* SET PSW KEY FROM ADDRESS */ + F(0xb20a, SPKA, S, Z, 0, a2, 0, 0, spka, 0, IF_PRIV) +/* SET STORAGE KEY EXTENDED */ + F(0xb22b, SSKE, RRF_c, Z, r1_o, r2_o, 0, 0, sske, 0, IF_PRIV) +/* SET SYSTEM MASK */ + F(0x8000, SSM, S, Z, 0, m2_8u, 0, 0, ssm, 0, IF_PRIV) +/* SIGNAL PROCESSOR */ + F(0xae00, SIGP, RS_a, Z, 0, a2, 0, 0, sigp, 0, IF_PRIV | IF_IO) +/* STORE CLOCK COMPARATOR */ + F(0xb207, STCKC, S, Z, la2, 0, new, m1_64a, stckc, 0, IF_PRIV) +/* STORE CONTROL */ + F(0xb600, STCTL, RS_a, Z, 0, a2, 0, 0, stctl, 0, IF_PRIV) + F(0xeb25, STCTG, RSY_a, Z, 0, a2, 0, 0, stctg, 0, IF_PRIV) +/* STORE CPU ADDRESS */ + F(0xb212, STAP, S, Z, la2, 0, new, m1_16a, stap, 0, IF_PRIV) +/* STORE CPU ID */ + F(0xb202, STIDP, S, Z, la2, 0, new, m1_64a, stidp, 0, IF_PRIV) +/* STORE CPU TIMER */ + F(0xb209, STPT, S, Z, la2, 0, new, m1_64a, stpt, 0, IF_PRIV | IF_IO) +/* STORE FACILITY LIST */ + F(0xb2b1, STFL, S, Z, 0, 0, 0, 0, stfl, 0, IF_PRIV) +/* STORE PREFIX */ + F(0xb211, STPX, S, Z, la2, 0, new, m1_32a, stpx, 0, IF_PRIV) +/* STORE SYSTEM INFORMATION */ + F(0xb27d, STSI, S, Z, 0, a2, 0, 0, stsi, 0, IF_PRIV) +/* STORE THEN AND SYSTEM MASK */ + F(0xac00, STNSM, SI, Z, la1, 0, 0, 0, stnosm, 0, IF_PRIV) +/* STORE THEN OR SYSTEM MASK */ + F(0xad00, STOSM, SI, Z, la1, 0, 0, 0, stnosm, 0, IF_PRIV) +/* STORE USING REAL ADDRESS */ + E(0xb246, STURA, RRE, Z, r1_o, ra2, 0, 0, stura, 0, MO_TEUL, IF_PRIV) + E(0xb925, STURG, RRE, Z, r1_o, ra2, 0, 0, stura, 0, MO_TEQ, IF_PRIV) +/* TEST BLOCK */ + F(0xb22c, TB, RRE, Z, 0, r2_o, 0, 0, testblock, 0, IF_PRIV) +/* TEST PROTECTION */ + C(0xe501, TPROT, SSE, Z, la1, a2, 0, 0, tprot, 0) + +/* CCW I/O Instructions */ + F(0xb276, XSCH, S, Z, 0, 0, 0, 0, xsch, 0, IF_PRIV | IF_IO) + F(0xb230, CSCH, S, Z, 0, 0, 0, 0, csch, 0, IF_PRIV | IF_IO) + F(0xb231, HSCH, S, Z, 0, 0, 0, 0, hsch, 0, IF_PRIV | IF_IO) + F(0xb232, MSCH, S, Z, 0, insn, 0, 0, msch, 0, IF_PRIV | IF_IO) + F(0xb23b, RCHP, S, Z, 0, 0, 0, 0, rchp, 0, IF_PRIV | IF_IO) + F(0xb238, RSCH, S, Z, 0, 0, 0, 0, rsch, 0, IF_PRIV | IF_IO) + F(0xb237, SAL, S, Z, 0, 0, 0, 0, sal, 0, IF_PRIV | IF_IO) + F(0xb23c, SCHM, S, Z, 0, insn, 0, 0, schm, 0, IF_PRIV | IF_IO) + F(0xb274, SIGA, S, Z, 0, 0, 0, 0, siga, 0, IF_PRIV | IF_IO) + F(0xb23a, STCPS, S, Z, 0, 0, 0, 0, stcps, 0, IF_PRIV | IF_IO) + F(0xb233, SSCH, S, Z, 0, insn, 0, 0, ssch, 0, IF_PRIV | IF_IO) + F(0xb239, STCRW, S, Z, 0, insn, 0, 0, stcrw, 0, IF_PRIV | IF_IO) + F(0xb234, STSCH, S, Z, 0, insn, 0, 0, stsch, 0, IF_PRIV | IF_IO) + F(0xb236, TPI , S, Z, la2, 0, 0, 0, tpi, 0, IF_PRIV | IF_IO) + F(0xb235, TSCH, S, Z, 0, insn, 0, 0, tsch, 0, IF_PRIV | IF_IO) + /* ??? Not listed in PoO ninth edition, but there's a linux driver that + uses it: "A CHSC subchannel is usually present on LPAR only." */ + F(0xb25f, CHSC, RRE, Z, 0, insn, 0, 0, chsc, 0, IF_PRIV | IF_IO) + +/* zPCI Instructions */ + /* None of these instructions are documented in the PoP, so this is all + based upon target/s390x/kvm.c and Linux code and likely incomplete */ + F(0xebd0, PCISTB, RSY_a, PCI, la2, 0, 0, 0, pcistb, 0, IF_PRIV | IF_IO) + F(0xebd1, SIC, RSY_a, AIS, r1, r3, 0, 0, sic, 0, IF_PRIV | IF_IO) + F(0xb9a0, CLP, RRF_c, PCI, 0, 0, 0, 0, clp, 0, IF_PRIV | IF_IO) + F(0xb9d0, PCISTG, RRE, PCI, 0, 0, 0, 0, pcistg, 0, IF_PRIV | IF_IO) + F(0xb9d2, PCILG, RRE, PCI, 0, 0, 0, 0, pcilg, 0, IF_PRIV | IF_IO) + F(0xb9d3, RPCIT, RRE, PCI, 0, 0, 0, 0, rpcit, 0, IF_PRIV | IF_IO) + F(0xe3d0, MPCIFC, RXY_a, PCI, la2, 0, 0, 0, mpcifc, 0, IF_PRIV | IF_IO) + F(0xe3d4, STPCIFC, RXY_a, PCI, la2, 0, 0, 0, stpcifc, 0, IF_PRIV | IF_IO) + +#endif /* CONFIG_USER_ONLY */ diff --git a/target/s390x/tcg/insn-format.def b/target/s390x/tcg/insn-format.def new file mode 100644 index 0000000000..6253edbd19 --- /dev/null +++ b/target/s390x/tcg/insn-format.def @@ -0,0 +1,81 @@ +/* Description of s390 insn formats. */ +/* NAME F1, F2... */ +F0(E) +F1(I, I(1, 8, 8)) +F2(RI_a, R(1, 8), I(2,16,16)) +F2(RI_b, R(1, 8), I(2,16,16)) +F2(RI_c, M(1, 8), I(2,16,16)) +F3(RIE_a, R(1, 8), I(2,16,16), M(3,32)) +F4(RIE_b, R(1, 8), R(2,12), M(3,32), I(4,16,16)) +F4(RIE_c, R(1, 8), I(2,32, 8), M(3,12), I(4,16,16)) +F3(RIE_d, R(1, 8), I(2,16,16), R(3,12)) +F3(RIE_e, R(1, 8), I(2,16,16), R(3,12)) +F5(RIE_f, R(1, 8), R(2,12), I(3,16,8), I(4,24,8), I(5,32,8)) +F3(RIE_g, R(1, 8), I(2,16,16), M(3,12)) +F2(RIL_a, R(1, 8), I(2,16,32)) +F2(RIL_b, R(1, 8), I(2,16,32)) +F2(RIL_c, M(1, 8), I(2,16,32)) +F4(RIS, R(1, 8), I(2,32, 8), M(3,12), BD(4,16,20)) +/* ??? The PoO does not call out subtypes _a and _b for RR, as it does + for e.g. RX. Our checking requires this for e.g. BCR. */ +F2(RR_a, R(1, 8), R(2,12)) +F2(RR_b, M(1, 8), R(2,12)) +F2(RRE, R(1,24), R(2,28)) +F3(RRD, R(1,16), R(2,28), R(3,24)) +F4(RRF_a, R(1,24), R(2,28), R(3,16), M(4,20)) +F4(RRF_b, R(1,24), R(2,28), R(3,16), M(4,20)) +F4(RRF_c, R(1,24), R(2,28), M(3,16), M(4,20)) +F4(RRF_d, R(1,24), R(2,28), M(3,16), M(4,20)) +F4(RRF_e, R(1,24), R(2,28), M(3,16), M(4,20)) +F4(RRS, R(1, 8), R(2,12), M(3,32), BD(4,16,20)) +F3(RS_a, R(1, 8), BD(2,16,20), R(3,12)) +F3(RS_b, R(1, 8), BD(2,16,20), M(3,12)) +F3(RSI, R(1, 8), I(2,16,16), R(3,12)) +F2(RSL, L(1, 8, 4), BD(1,16,20)) +F3(RSY_a, R(1, 8), BDL(2), R(3,12)) +F3(RSY_b, R(1, 8), BDL(2), M(3,12)) +F2(RX_a, R(1, 8), BXD(2)) +F2(RX_b, M(1, 8), BXD(2)) +F3(RXE, R(1, 8), BXD(2), M(3,32)) +F3(RXF, R(1,32), BXD(2), R(3, 8)) +F2(RXY_a, R(1, 8), BXDL(2)) +F2(RXY_b, M(1, 8), BXDL(2)) +F1(S, BD(2,16,20)) +F2(SI, BD(1,16,20), I(2,8,8)) +F2(SIL, BD(1,16,20), I(2,32,16)) +F2(SIY, BDL(1), I(2, 8, 8)) +F3(SS_a, L(1, 8, 8), BD(1,16,20), BD(2,32,36)) +F4(SS_b, L(1, 8, 4), BD(1,16,20), L(2,12,4), BD(2,32,36)) +F4(SS_c, L(1, 8, 4), BD(1,16,20), BD(2,32,36), I(3,12, 4)) +/* ??? Odd man out. The L1 field here is really a register, but the + easy way to compress the fields has R1 and B1 overlap. */ +F4(SS_d, L(1, 8, 4), BD(1,16,20), BD(2,32,36), R(3,12)) +F4(SS_e, R(1, 8), BD(2,16,20), R(3,12), BD(4,32,36)) +F3(SS_f, BD(1,16,20), L(2,8,8), BD(2,32,36)) +F2(SSE, BD(1,16,20), BD(2,32,36)) +F3(SSF, BD(1,16,20), BD(2,32,36), R(3,8)) +F3(VRI_a, V(1,8), I(2,16,16), M(3,32)) +F4(VRI_b, V(1,8), I(2,16,8), I(3,24,8), M(4,32)) +F4(VRI_c, V(1,8), V(3,12), I(2,16,16), M(4,32)) +F5(VRI_d, V(1,8), V(2,12), V(3,16), I(4,24,8), M(5,32)) +F5(VRI_e, V(1,8), V(2,12), I(3,16,12), M(5,28), M(4,32)) +F5(VRI_f, V(1,8), V(2,12), V(3,16), M(5,24), I(4,28,8)) +F5(VRI_g, V(1,8), V(2,12), I(4,16,8), M(5,24), I(3,28,8)) +F3(VRI_h, V(1,8), I(2,16,16), I(3,32,4)) +F4(VRI_i, V(1,8), R(2,12), M(4,24), I(3,28,8)) +F5(VRR_a, V(1,8), V(2,12), M(5,24), M(4,28), M(3,32)) +F5(VRR_b, V(1,8), V(2,12), V(3,16), M(5,24), M(4,32)) +F6(VRR_c, V(1,8), V(2,12), V(3,16), M(6,24), M(5,28), M(4,32)) +F6(VRR_d, V(1,8), V(2,12), V(3,16), M(5,20), M(6,24), V(4,32)) +F6(VRR_e, V(1,8), V(2,12), V(3,16), M(6,20), M(5,28), V(4,32)) +F3(VRR_f, V(1,8), R(2,12), R(3,16)) +F1(VRR_g, V(1,12)) +F3(VRR_h, V(1,12), V(2,16), M(3,24)) +F3(VRR_i, R(1,8), V(2,12), M(3,24)) +F4(VRS_a, V(1,8), V(3,12), BD(2,16,20), M(4,32)) +F4(VRS_b, V(1,8), R(3,12), BD(2,16,20), M(4,32)) +F4(VRS_c, R(1,8), V(3,12), BD(2,16,20), M(4,32)) +F3(VRS_d, R(3,12), BD(2,16,20), V(1,32)) +F4(VRV, V(1,8), V(2,12), BD(2,16,20), M(3,32)) +F3(VRX, V(1,8), BXD(2), M(3,32)) +F3(VSI, I(3,8,8), BD(2,16,20), V(1,32)) diff --git a/target/s390x/tcg/int_helper.c b/target/s390x/tcg/int_helper.c new file mode 100644 index 0000000000..954542388a --- /dev/null +++ b/target/s390x/tcg/int_helper.c @@ -0,0 +1,148 @@ +/* + * S/390 integer helper routines + * + * Copyright (c) 2009 Ulrich Hecht + * Copyright (c) 2009 Alexander Graf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "s390x-internal.h" +#include "tcg_s390x.h" +#include "exec/exec-all.h" +#include "qemu/host-utils.h" +#include "exec/helper-proto.h" + +/* #define DEBUG_HELPER */ +#ifdef DEBUG_HELPER +#define HELPER_LOG(x...) qemu_log(x) +#else +#define HELPER_LOG(x...) +#endif + +/* 64/32 -> 32 signed division */ +int64_t HELPER(divs32)(CPUS390XState *env, int64_t a, int64_t b64) +{ + int32_t ret, b = b64; + int64_t q; + + if (b == 0) { + tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); + } + + ret = q = a / b; + env->retxl = a % b; + + /* Catch non-representable quotient. */ + if (ret != q) { + tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); + } + + return ret; +} + +/* 64/32 -> 32 unsigned division */ +uint64_t HELPER(divu32)(CPUS390XState *env, uint64_t a, uint64_t b64) +{ + uint32_t ret, b = b64; + uint64_t q; + + if (b == 0) { + tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); + } + + ret = q = a / b; + env->retxl = a % b; + + /* Catch non-representable quotient. */ + if (ret != q) { + tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); + } + + return ret; +} + +/* 64/64 -> 64 signed division */ +int64_t HELPER(divs64)(CPUS390XState *env, int64_t a, int64_t b) +{ + /* Catch divide by zero, and non-representable quotient (MIN / -1). */ + if (b == 0 || (b == -1 && a == (1ll << 63))) { + tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); + } + env->retxl = a % b; + return a / b; +} + +/* 128 -> 64/64 unsigned division */ +uint64_t HELPER(divu64)(CPUS390XState *env, uint64_t ah, uint64_t al, + uint64_t b) +{ + uint64_t ret; + /* Signal divide by zero. */ + if (b == 0) { + tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); + } + if (ah == 0) { + /* 64 -> 64/64 case */ + env->retxl = al % b; + ret = al / b; + } else { + /* ??? Move i386 idivq helper to host-utils. */ +#ifdef CONFIG_INT128 + __uint128_t a = ((__uint128_t)ah << 64) | al; + __uint128_t q = a / b; + env->retxl = a % b; + ret = q; + if (ret != q) { + tcg_s390_program_interrupt(env, PGM_FIXPT_DIVIDE, GETPC()); + } +#else + /* 32-bit hosts would need special wrapper functionality - just abort if + we encounter such a case; it's very unlikely anyways. */ + cpu_abort(env_cpu(env), "128 -> 64/64 division not implemented\n"); +#endif + } + return ret; +} + +uint64_t HELPER(cvd)(int32_t reg) +{ + /* positive 0 */ + uint64_t dec = 0x0c; + int64_t bin = reg; + int shift; + + if (bin < 0) { + bin = -bin; + dec = 0x0d; + } + + for (shift = 4; (shift < 64) && bin; shift += 4) { + dec |= (bin % 10) << shift; + bin /= 10; + } + + return dec; +} + +uint64_t HELPER(popcnt)(uint64_t val) +{ + /* Note that we don't fold past bytes. */ + val = (val & 0x5555555555555555ULL) + ((val >> 1) & 0x5555555555555555ULL); + val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL); + val = (val + (val >> 4)) & 0x0f0f0f0f0f0f0f0fULL; + return val; +} diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c new file mode 100644 index 0000000000..9bae13ecf0 --- /dev/null +++ b/target/s390x/tcg/mem_helper.c @@ -0,0 +1,3008 @@ +/* + * S/390 memory access helper routines + * + * Copyright (c) 2009 Ulrich Hecht + * Copyright (c) 2009 Alexander Graf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "s390x-internal.h" +#include "tcg_s390x.h" +#include "exec/helper-proto.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "qemu/int128.h" +#include "qemu/atomic128.h" +#include "tcg/tcg.h" + +#if !defined(CONFIG_USER_ONLY) +#include "hw/s390x/storage-keys.h" +#include "hw/boards.h" +#endif + +/*****************************************************************************/ +/* Softmmu support */ + +/* #define DEBUG_HELPER */ +#ifdef DEBUG_HELPER +#define HELPER_LOG(x...) qemu_log(x) +#else +#define HELPER_LOG(x...) +#endif + +static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key) +{ + uint16_t pkm = env->cregs[3] >> 16; + + if (env->psw.mask & PSW_MASK_PSTATE) { + /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */ + return pkm & (0x80 >> psw_key); + } + return true; +} + +static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest, + uint64_t src, uint32_t len) +{ + if (!len || src == dest) { + return false; + } + /* Take care of wrapping at the end of address space. */ + if (unlikely(wrap_address(env, src + len - 1) < src)) { + return dest > src || dest <= wrap_address(env, src + len - 1); + } + return dest > src && dest <= src + len - 1; +} + +/* Trigger a SPECIFICATION exception if an address or a length is not + naturally aligned. */ +static inline void check_alignment(CPUS390XState *env, uint64_t v, + int wordsize, uintptr_t ra) +{ + if (v % wordsize) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } +} + +/* Load a value from memory according to its size. */ +static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr, + int wordsize, uintptr_t ra) +{ + switch (wordsize) { + case 1: + return cpu_ldub_data_ra(env, addr, ra); + case 2: + return cpu_lduw_data_ra(env, addr, ra); + default: + abort(); + } +} + +/* Store a to memory according to its size. */ +static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr, + uint64_t value, int wordsize, + uintptr_t ra) +{ + switch (wordsize) { + case 1: + cpu_stb_data_ra(env, addr, value, ra); + break; + case 2: + cpu_stw_data_ra(env, addr, value, ra); + break; + default: + abort(); + } +} + +/* An access covers at most 4096 bytes and therefore at most two pages. */ +typedef struct S390Access { + target_ulong vaddr1; + target_ulong vaddr2; + char *haddr1; + char *haddr2; + uint16_t size1; + uint16_t size2; + /* + * If we can't access the host page directly, we'll have to do I/O access + * via ld/st helpers. These are internal details, so we store the + * mmu idx to do the access here instead of passing it around in the + * helpers. Maybe, one day we can get rid of ld/st access - once we can + * handle TLB_NOTDIRTY differently. We don't expect these special accesses + * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP + * pages, we might trigger a new MMU translation - very unlikely that + * the mapping changes in between and we would trigger a fault. + */ + int mmu_idx; +} S390Access; + +/* + * With nonfault=1, return the PGM_ exception that would have been injected + * into the guest; return 0 if no exception was detected. + * + * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec. + * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr. + */ +static int s390_probe_access(CPUArchState *env, target_ulong addr, int size, + MMUAccessType access_type, int mmu_idx, + bool nonfault, void **phost, uintptr_t ra) +{ + int flags; + +#if defined(CONFIG_USER_ONLY) + flags = page_get_flags(addr); + if (!(flags & (access_type == MMU_DATA_LOAD ? PAGE_READ : PAGE_WRITE_ORG))) { + env->__excp_addr = addr; + flags = (flags & PAGE_VALID) ? PGM_PROTECTION : PGM_ADDRESSING; + if (nonfault) { + return flags; + } + tcg_s390_program_interrupt(env, flags, ra); + } + *phost = g2h(env_cpu(env), addr); +#else + /* + * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL + * to detect if there was an exception during tlb_fill(). + */ + env->tlb_fill_exc = 0; + flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost, + ra); + if (env->tlb_fill_exc) { + return env->tlb_fill_exc; + } + + if (unlikely(flags & TLB_WATCHPOINT)) { + /* S390 does not presently use transaction attributes. */ + cpu_check_watchpoint(env_cpu(env), addr, size, + MEMTXATTRS_UNSPECIFIED, + (access_type == MMU_DATA_STORE + ? BP_MEM_WRITE : BP_MEM_READ), ra); + } +#endif + return 0; +} + +static int access_prepare_nf(S390Access *access, CPUS390XState *env, + bool nonfault, vaddr vaddr1, int size, + MMUAccessType access_type, + int mmu_idx, uintptr_t ra) +{ + void *haddr1, *haddr2 = NULL; + int size1, size2, exc; + vaddr vaddr2 = 0; + + assert(size > 0 && size <= 4096); + + size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)), + size2 = size - size1; + + exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault, + &haddr1, ra); + if (exc) { + return exc; + } + if (unlikely(size2)) { + /* The access crosses page boundaries. */ + vaddr2 = wrap_address(env, vaddr1 + size1); + exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx, + nonfault, &haddr2, ra); + if (exc) { + return exc; + } + } + + *access = (S390Access) { + .vaddr1 = vaddr1, + .vaddr2 = vaddr2, + .haddr1 = haddr1, + .haddr2 = haddr2, + .size1 = size1, + .size2 = size2, + .mmu_idx = mmu_idx + }; + return 0; +} + +static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size, + MMUAccessType access_type, int mmu_idx, + uintptr_t ra) +{ + S390Access ret; + int exc = access_prepare_nf(&ret, env, false, vaddr, size, + access_type, mmu_idx, ra); + assert(!exc); + return ret; +} + +/* Helper to handle memset on a single page. */ +static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr, + uint8_t byte, uint16_t size, int mmu_idx, + uintptr_t ra) +{ +#ifdef CONFIG_USER_ONLY + g_assert(haddr); + memset(haddr, byte, size); +#else + TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); + int i; + + if (likely(haddr)) { + memset(haddr, byte, size); + } else { + /* + * Do a single access and test if we can then get access to the + * page. This is especially relevant to speed up TLB_NOTDIRTY. + */ + g_assert(size > 0); + helper_ret_stb_mmu(env, vaddr, byte, oi, ra); + haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); + if (likely(haddr)) { + memset(haddr + 1, byte, size - 1); + } else { + for (i = 1; i < size; i++) { + helper_ret_stb_mmu(env, vaddr + i, byte, oi, ra); + } + } + } +#endif +} + +static void access_memset(CPUS390XState *env, S390Access *desta, + uint8_t byte, uintptr_t ra) +{ + + do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1, + desta->mmu_idx, ra); + if (likely(!desta->size2)) { + return; + } + do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2, + desta->mmu_idx, ra); +} + +static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr, + int offset, int mmu_idx, uintptr_t ra) +{ +#ifdef CONFIG_USER_ONLY + return ldub_p(*haddr + offset); +#else + TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); + uint8_t byte; + + if (likely(*haddr)) { + return ldub_p(*haddr + offset); + } + /* + * Do a single access and test if we can then get access to the + * page. This is especially relevant to speed up TLB_NOTDIRTY. + */ + byte = helper_ret_ldub_mmu(env, vaddr + offset, oi, ra); + *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx); + return byte; +#endif +} + +static uint8_t access_get_byte(CPUS390XState *env, S390Access *access, + int offset, uintptr_t ra) +{ + if (offset < access->size1) { + return do_access_get_byte(env, access->vaddr1, &access->haddr1, + offset, access->mmu_idx, ra); + } + return do_access_get_byte(env, access->vaddr2, &access->haddr2, + offset - access->size1, access->mmu_idx, ra); +} + +static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr, + int offset, uint8_t byte, int mmu_idx, + uintptr_t ra) +{ +#ifdef CONFIG_USER_ONLY + stb_p(*haddr + offset, byte); +#else + TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); + + if (likely(*haddr)) { + stb_p(*haddr + offset, byte); + return; + } + /* + * Do a single access and test if we can then get access to the + * page. This is especially relevant to speed up TLB_NOTDIRTY. + */ + helper_ret_stb_mmu(env, vaddr + offset, byte, oi, ra); + *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); +#endif +} + +static void access_set_byte(CPUS390XState *env, S390Access *access, + int offset, uint8_t byte, uintptr_t ra) +{ + if (offset < access->size1) { + do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte, + access->mmu_idx, ra); + } else { + do_access_set_byte(env, access->vaddr2, &access->haddr2, + offset - access->size1, byte, access->mmu_idx, ra); + } +} + +/* + * Move data with the same semantics as memmove() in case ranges don't overlap + * or src > dest. Undefined behavior on destructive overlaps. + */ +static void access_memmove(CPUS390XState *env, S390Access *desta, + S390Access *srca, uintptr_t ra) +{ + int diff; + + g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2); + + /* Fallback to slow access in case we don't have access to all host pages */ + if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) || + !srca->haddr1 || (srca->size2 && !srca->haddr2))) { + int i; + + for (i = 0; i < desta->size1 + desta->size2; i++) { + uint8_t byte = access_get_byte(env, srca, i, ra); + + access_set_byte(env, desta, i, byte, ra); + } + return; + } + + if (srca->size1 == desta->size1) { + memmove(desta->haddr1, srca->haddr1, srca->size1); + if (unlikely(srca->size2)) { + memmove(desta->haddr2, srca->haddr2, srca->size2); + } + } else if (srca->size1 < desta->size1) { + diff = desta->size1 - srca->size1; + memmove(desta->haddr1, srca->haddr1, srca->size1); + memmove(desta->haddr1 + srca->size1, srca->haddr2, diff); + if (likely(desta->size2)) { + memmove(desta->haddr2, srca->haddr2 + diff, desta->size2); + } + } else { + diff = srca->size1 - desta->size1; + memmove(desta->haddr1, srca->haddr1, desta->size1); + memmove(desta->haddr2, srca->haddr1 + desta->size1, diff); + if (likely(srca->size2)) { + memmove(desta->haddr2 + diff, srca->haddr2, srca->size2); + } + } +} + +static int mmu_idx_from_as(uint8_t as) +{ + switch (as) { + case AS_PRIMARY: + return MMU_PRIMARY_IDX; + case AS_SECONDARY: + return MMU_SECONDARY_IDX; + case AS_HOME: + return MMU_HOME_IDX; + default: + /* FIXME AS_ACCREG */ + g_assert_not_reached(); + } +} + +/* and on array */ +static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest, + uint64_t src, uintptr_t ra) +{ + const int mmu_idx = cpu_mmu_index(env, false); + S390Access srca1, srca2, desta; + uint32_t i; + uint8_t c = 0; + + HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n", + __func__, l, dest, src); + + /* NC always processes one more byte than specified - maximum is 256 */ + l++; + + srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra); + srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + for (i = 0; i < l; i++) { + const uint8_t x = access_get_byte(env, &srca1, i, ra) & + access_get_byte(env, &srca2, i, ra); + + c |= x; + access_set_byte(env, &desta, i, x, ra); + } + return c != 0; +} + +uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest, + uint64_t src) +{ + return do_helper_nc(env, l, dest, src, GETPC()); +} + +/* xor on array */ +static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest, + uint64_t src, uintptr_t ra) +{ + const int mmu_idx = cpu_mmu_index(env, false); + S390Access srca1, srca2, desta; + uint32_t i; + uint8_t c = 0; + + HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n", + __func__, l, dest, src); + + /* XC always processes one more byte than specified - maximum is 256 */ + l++; + + srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra); + srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + + /* xor with itself is the same as memset(0) */ + if (src == dest) { + access_memset(env, &desta, 0, ra); + return 0; + } + + for (i = 0; i < l; i++) { + const uint8_t x = access_get_byte(env, &srca1, i, ra) ^ + access_get_byte(env, &srca2, i, ra); + + c |= x; + access_set_byte(env, &desta, i, x, ra); + } + return c != 0; +} + +uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest, + uint64_t src) +{ + return do_helper_xc(env, l, dest, src, GETPC()); +} + +/* or on array */ +static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest, + uint64_t src, uintptr_t ra) +{ + const int mmu_idx = cpu_mmu_index(env, false); + S390Access srca1, srca2, desta; + uint32_t i; + uint8_t c = 0; + + HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n", + __func__, l, dest, src); + + /* OC always processes one more byte than specified - maximum is 256 */ + l++; + + srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra); + srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + for (i = 0; i < l; i++) { + const uint8_t x = access_get_byte(env, &srca1, i, ra) | + access_get_byte(env, &srca2, i, ra); + + c |= x; + access_set_byte(env, &desta, i, x, ra); + } + return c != 0; +} + +uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest, + uint64_t src) +{ + return do_helper_oc(env, l, dest, src, GETPC()); +} + +/* memmove */ +static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest, + uint64_t src, uintptr_t ra) +{ + const int mmu_idx = cpu_mmu_index(env, false); + S390Access srca, desta; + uint32_t i; + + HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n", + __func__, l, dest, src); + + /* MVC always copies one more byte than specified - maximum is 256 */ + l++; + + srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + + /* + * "When the operands overlap, the result is obtained as if the operands + * were processed one byte at a time". Only non-destructive overlaps + * behave like memmove(). + */ + if (dest == src + 1) { + access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra); + } else if (!is_destructive_overlap(env, dest, src, l)) { + access_memmove(env, &desta, &srca, ra); + } else { + for (i = 0; i < l; i++) { + uint8_t byte = access_get_byte(env, &srca, i, ra); + + access_set_byte(env, &desta, i, byte, ra); + } + } + + return env->cc_op; +} + +void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) +{ + do_helper_mvc(env, l, dest, src, GETPC()); +} + +/* move inverse */ +void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) +{ + const int mmu_idx = cpu_mmu_index(env, false); + S390Access srca, desta; + uintptr_t ra = GETPC(); + int i; + + /* MVCIN always copies one more byte than specified - maximum is 256 */ + l++; + + src = wrap_address(env, src - l + 1); + srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + for (i = 0; i < l; i++) { + const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra); + + access_set_byte(env, &desta, i, x, ra); + } +} + +/* move numerics */ +void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) +{ + const int mmu_idx = cpu_mmu_index(env, false); + S390Access srca1, srca2, desta; + uintptr_t ra = GETPC(); + int i; + + /* MVN always copies one more byte than specified - maximum is 256 */ + l++; + + srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra); + srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + for (i = 0; i < l; i++) { + const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) | + (access_get_byte(env, &srca2, i, ra) & 0xf0); + + access_set_byte(env, &desta, i, x, ra); + } +} + +/* move with offset */ +void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) +{ + const int mmu_idx = cpu_mmu_index(env, false); + /* MVO always processes one more byte than specified - maximum is 16 */ + const int len_dest = (l >> 4) + 1; + const int len_src = (l & 0xf) + 1; + uintptr_t ra = GETPC(); + uint8_t byte_dest, byte_src; + S390Access srca, desta; + int i, j; + + srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra); + + /* Handle rightmost byte */ + byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra); + byte_src = access_get_byte(env, &srca, len_src - 1, ra); + byte_dest = (byte_dest & 0x0f) | (byte_src << 4); + access_set_byte(env, &desta, len_dest - 1, byte_dest, ra); + + /* Process remaining bytes from right to left */ + for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) { + byte_dest = byte_src >> 4; + if (j >= 0) { + byte_src = access_get_byte(env, &srca, j, ra); + } else { + byte_src = 0; + } + byte_dest |= byte_src << 4; + access_set_byte(env, &desta, i, byte_dest, ra); + } +} + +/* move zones */ +void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src) +{ + const int mmu_idx = cpu_mmu_index(env, false); + S390Access srca1, srca2, desta; + uintptr_t ra = GETPC(); + int i; + + /* MVZ always copies one more byte than specified - maximum is 256 */ + l++; + + srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra); + srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra); + for (i = 0; i < l; i++) { + const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) | + (access_get_byte(env, &srca2, i, ra) & 0x0f); + + access_set_byte(env, &desta, i, x, ra); + } +} + +/* compare unsigned byte arrays */ +static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1, + uint64_t s2, uintptr_t ra) +{ + uint32_t i; + uint32_t cc = 0; + + HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n", + __func__, l, s1, s2); + + for (i = 0; i <= l; i++) { + uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra); + uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra); + HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y); + if (x < y) { + cc = 1; + break; + } else if (x > y) { + cc = 2; + break; + } + } + + HELPER_LOG("\n"); + return cc; +} + +uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2) +{ + return do_helper_clc(env, l, s1, s2, GETPC()); +} + +/* compare logical under mask */ +uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask, + uint64_t addr) +{ + uintptr_t ra = GETPC(); + uint32_t cc = 0; + + HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1, + mask, addr); + + while (mask) { + if (mask & 8) { + uint8_t d = cpu_ldub_data_ra(env, addr, ra); + uint8_t r = extract32(r1, 24, 8); + HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d, + addr); + if (r < d) { + cc = 1; + break; + } else if (r > d) { + cc = 2; + break; + } + addr++; + } + mask = (mask << 1) & 0xf; + r1 <<= 8; + } + + HELPER_LOG("\n"); + return cc; +} + +static inline uint64_t get_address(CPUS390XState *env, int reg) +{ + return wrap_address(env, env->regs[reg]); +} + +/* + * Store the address to the given register, zeroing out unused leftmost + * bits in bit positions 32-63 (24-bit and 31-bit mode only). + */ +static inline void set_address_zero(CPUS390XState *env, int reg, + uint64_t address) +{ + if (env->psw.mask & PSW_MASK_64) { + env->regs[reg] = address; + } else { + if (!(env->psw.mask & PSW_MASK_32)) { + address &= 0x00ffffff; + } else { + address &= 0x7fffffff; + } + env->regs[reg] = deposit64(env->regs[reg], 0, 32, address); + } +} + +static inline void set_address(CPUS390XState *env, int reg, uint64_t address) +{ + if (env->psw.mask & PSW_MASK_64) { + /* 64-Bit mode */ + env->regs[reg] = address; + } else { + if (!(env->psw.mask & PSW_MASK_32)) { + /* 24-Bit mode. According to the PoO it is implementation + dependent if bits 32-39 remain unchanged or are set to + zeros. Choose the former so that the function can also be + used for TRT. */ + env->regs[reg] = deposit64(env->regs[reg], 0, 24, address); + } else { + /* 31-Bit mode. According to the PoO it is implementation + dependent if bit 32 remains unchanged or is set to zero. + Choose the latter so that the function can also be used for + TRT. */ + address &= 0x7fffffff; + env->regs[reg] = deposit64(env->regs[reg], 0, 32, address); + } + } +} + +static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length) +{ + if (!(env->psw.mask & PSW_MASK_64)) { + return (uint32_t)length; + } + return length; +} + +static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length) +{ + if (!(env->psw.mask & PSW_MASK_64)) { + /* 24-Bit and 31-Bit mode */ + length &= 0x7fffffff; + } + return length; +} + +static inline uint64_t get_length(CPUS390XState *env, int reg) +{ + return wrap_length31(env, env->regs[reg]); +} + +static inline void set_length(CPUS390XState *env, int reg, uint64_t length) +{ + if (env->psw.mask & PSW_MASK_64) { + /* 64-Bit mode */ + env->regs[reg] = length; + } else { + /* 24-Bit and 31-Bit mode */ + env->regs[reg] = deposit64(env->regs[reg], 0, 32, length); + } +} + +/* search string (c is byte to search, r2 is string, r1 end of string) */ +void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2) +{ + uintptr_t ra = GETPC(); + uint64_t end, str; + uint32_t len; + uint8_t v, c = env->regs[0]; + + /* Bits 32-55 must contain all 0. */ + if (env->regs[0] & 0xffffff00u) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + str = get_address(env, r2); + end = get_address(env, r1); + + /* Lest we fail to service interrupts in a timely manner, limit the + amount of work we're willing to do. For now, let's cap at 8k. */ + for (len = 0; len < 0x2000; ++len) { + if (str + len == end) { + /* Character not found. R1 & R2 are unmodified. */ + env->cc_op = 2; + return; + } + v = cpu_ldub_data_ra(env, str + len, ra); + if (v == c) { + /* Character found. Set R1 to the location; R2 is unmodified. */ + env->cc_op = 1; + set_address(env, r1, str + len); + return; + } + } + + /* CPU-determined bytes processed. Advance R2 to next byte to process. */ + env->cc_op = 3; + set_address(env, r2, str + len); +} + +void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2) +{ + uintptr_t ra = GETPC(); + uint32_t len; + uint16_t v, c = env->regs[0]; + uint64_t end, str, adj_end; + + /* Bits 32-47 of R0 must be zero. */ + if (env->regs[0] & 0xffff0000u) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + str = get_address(env, r2); + end = get_address(env, r1); + + /* If the LSB of the two addresses differ, use one extra byte. */ + adj_end = end + ((str ^ end) & 1); + + /* Lest we fail to service interrupts in a timely manner, limit the + amount of work we're willing to do. For now, let's cap at 8k. */ + for (len = 0; len < 0x2000; len += 2) { + if (str + len == adj_end) { + /* End of input found. */ + env->cc_op = 2; + return; + } + v = cpu_lduw_data_ra(env, str + len, ra); + if (v == c) { + /* Character found. Set R1 to the location; R2 is unmodified. */ + env->cc_op = 1; + set_address(env, r1, str + len); + return; + } + } + + /* CPU-determined bytes processed. Advance R2 to next byte to process. */ + env->cc_op = 3; + set_address(env, r2, str + len); +} + +/* unsigned string compare (c is string terminator) */ +uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2) +{ + uintptr_t ra = GETPC(); + uint32_t len; + + c = c & 0xff; + s1 = wrap_address(env, s1); + s2 = wrap_address(env, s2); + + /* Lest we fail to service interrupts in a timely manner, limit the + amount of work we're willing to do. For now, let's cap at 8k. */ + for (len = 0; len < 0x2000; ++len) { + uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra); + uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra); + if (v1 == v2) { + if (v1 == c) { + /* Equal. CC=0, and don't advance the registers. */ + env->cc_op = 0; + env->retxl = s2; + return s1; + } + } else { + /* Unequal. CC={1,2}, and advance the registers. Note that + the terminator need not be zero, but the string that contains + the terminator is by definition "low". */ + env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2); + env->retxl = s2 + len; + return s1 + len; + } + } + + /* CPU-determined bytes equal; advance the registers. */ + env->cc_op = 3; + env->retxl = s2 + len; + return s1 + len; +} + +/* move page */ +uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2) +{ + const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK; + const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK; + const int mmu_idx = cpu_mmu_index(env, false); + const bool f = extract64(r0, 11, 1); + const bool s = extract64(r0, 10, 1); + const bool cco = extract64(r0, 8, 1); + uintptr_t ra = GETPC(); + S390Access srca, desta; + int exc; + + if ((f && s) || extract64(r0, 12, 4)) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC()); + } + + /* + * We always manually handle exceptions such that we can properly store + * r1/r2 to the lowcore on page-translation exceptions. + * + * TODO: Access key handling + */ + exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE, + MMU_DATA_LOAD, mmu_idx, ra); + if (exc) { + if (cco) { + return 2; + } + goto inject_exc; + } + exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE, + MMU_DATA_STORE, mmu_idx, ra); + if (exc) { + if (cco && exc != PGM_PROTECTION) { + return 1; + } + goto inject_exc; + } + access_memmove(env, &desta, &srca, ra); + return 0; /* data moved */ +inject_exc: +#if !defined(CONFIG_USER_ONLY) + if (exc != PGM_ADDRESSING) { + stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code), + env->tlb_fill_tec); + } + if (exc == PGM_PAGE_TRANS) { + stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id), + r1 << 4 | r2); + } +#endif + tcg_s390_program_interrupt(env, exc, ra); +} + +/* string copy */ +uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2) +{ + const int mmu_idx = cpu_mmu_index(env, false); + const uint64_t d = get_address(env, r1); + const uint64_t s = get_address(env, r2); + const uint8_t c = env->regs[0]; + const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK)); + S390Access srca, desta; + uintptr_t ra = GETPC(); + int i; + + if (env->regs[0] & 0xffffff00ull) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + /* + * Our access should not exceed single pages, as we must not report access + * exceptions exceeding the actually copied range (which we don't know at + * this point). We might over-indicate watchpoints within the pages + * (if we ever care, we have to limit processing to a single byte). + */ + srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra); + for (i = 0; i < len; i++) { + const uint8_t v = access_get_byte(env, &srca, i, ra); + + access_set_byte(env, &desta, i, v, ra); + if (v == c) { + set_address_zero(env, r1, d + i); + return 1; + } + } + set_address_zero(env, r1, d + len); + set_address_zero(env, r2, s + len); + return 3; +} + +/* load access registers r1 to r3 from memory at a2 */ +void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3) +{ + uintptr_t ra = GETPC(); + int i; + + if (a2 & 0x3) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + for (i = r1;; i = (i + 1) % 16) { + env->aregs[i] = cpu_ldl_data_ra(env, a2, ra); + a2 += 4; + + if (i == r3) { + break; + } + } +} + +/* store access registers r1 to r3 in memory at a2 */ +void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3) +{ + uintptr_t ra = GETPC(); + int i; + + if (a2 & 0x3) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + for (i = r1;; i = (i + 1) % 16) { + cpu_stl_data_ra(env, a2, env->aregs[i], ra); + a2 += 4; + + if (i == r3) { + break; + } + } +} + +/* move long helper */ +static inline uint32_t do_mvcl(CPUS390XState *env, + uint64_t *dest, uint64_t *destlen, + uint64_t *src, uint64_t *srclen, + uint16_t pad, int wordsize, uintptr_t ra) +{ + const int mmu_idx = cpu_mmu_index(env, false); + int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK)); + S390Access srca, desta; + int i, cc; + + if (*destlen == *srclen) { + cc = 0; + } else if (*destlen < *srclen) { + cc = 1; + } else { + cc = 2; + } + + if (!*destlen) { + return cc; + } + + /* + * Only perform one type of type of operation (move/pad) at a time. + * Stay within single pages. + */ + if (*srclen) { + /* Copy the src array */ + len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len); + *destlen -= len; + *srclen -= len; + srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra); + desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra); + access_memmove(env, &desta, &srca, ra); + *src = wrap_address(env, *src + len); + *dest = wrap_address(env, *dest + len); + } else if (wordsize == 1) { + /* Pad the remaining area */ + *destlen -= len; + desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra); + access_memset(env, &desta, pad, ra); + *dest = wrap_address(env, *dest + len); + } else { + desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra); + + /* The remaining length selects the padding byte. */ + for (i = 0; i < len; (*destlen)--, i++) { + if (*destlen & 1) { + access_set_byte(env, &desta, i, pad, ra); + } else { + access_set_byte(env, &desta, i, pad >> 8, ra); + } + } + *dest = wrap_address(env, *dest + len); + } + + return *destlen ? 3 : cc; +} + +/* move long */ +uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2) +{ + const int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + uint64_t destlen = env->regs[r1 + 1] & 0xffffff; + uint64_t dest = get_address(env, r1); + uint64_t srclen = env->regs[r2 + 1] & 0xffffff; + uint64_t src = get_address(env, r2); + uint8_t pad = env->regs[r2 + 1] >> 24; + CPUState *cs = env_cpu(env); + S390Access srca, desta; + uint32_t cc, cur_len; + + if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) { + cc = 3; + } else if (srclen == destlen) { + cc = 0; + } else if (destlen < srclen) { + cc = 1; + } else { + cc = 2; + } + + /* We might have to zero-out some bits even if there was no action. */ + if (unlikely(!destlen || cc == 3)) { + set_address_zero(env, r2, src); + set_address_zero(env, r1, dest); + return cc; + } else if (!srclen) { + set_address_zero(env, r2, src); + } + + /* + * Only perform one type of type of operation (move/pad) in one step. + * Stay within single pages. + */ + while (destlen) { + cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK)); + if (!srclen) { + desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx, + ra); + access_memset(env, &desta, pad, ra); + } else { + cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len); + + srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx, + ra); + desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx, + ra); + access_memmove(env, &desta, &srca, ra); + src = wrap_address(env, src + cur_len); + srclen -= cur_len; + env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen); + set_address_zero(env, r2, src); + } + dest = wrap_address(env, dest + cur_len); + destlen -= cur_len; + env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen); + set_address_zero(env, r1, dest); + + /* + * MVCL is interruptible. Return to the main loop if requested after + * writing back all state to registers. If no interrupt will get + * injected, we'll end up back in this handler and continue processing + * the remaining parts. + */ + if (destlen && unlikely(cpu_loop_exit_requested(cs))) { + cpu_loop_exit_restore(cs, ra); + } + } + return cc; +} + +/* move long extended */ +uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2, + uint32_t r3) +{ + uintptr_t ra = GETPC(); + uint64_t destlen = get_length(env, r1 + 1); + uint64_t dest = get_address(env, r1); + uint64_t srclen = get_length(env, r3 + 1); + uint64_t src = get_address(env, r3); + uint8_t pad = a2; + uint32_t cc; + + cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra); + + set_length(env, r1 + 1, destlen); + set_length(env, r3 + 1, srclen); + set_address(env, r1, dest); + set_address(env, r3, src); + + return cc; +} + +/* move long unicode */ +uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2, + uint32_t r3) +{ + uintptr_t ra = GETPC(); + uint64_t destlen = get_length(env, r1 + 1); + uint64_t dest = get_address(env, r1); + uint64_t srclen = get_length(env, r3 + 1); + uint64_t src = get_address(env, r3); + uint16_t pad = a2; + uint32_t cc; + + cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra); + + set_length(env, r1 + 1, destlen); + set_length(env, r3 + 1, srclen); + set_address(env, r1, dest); + set_address(env, r3, src); + + return cc; +} + +/* compare logical long helper */ +static inline uint32_t do_clcl(CPUS390XState *env, + uint64_t *src1, uint64_t *src1len, + uint64_t *src3, uint64_t *src3len, + uint16_t pad, uint64_t limit, + int wordsize, uintptr_t ra) +{ + uint64_t len = MAX(*src1len, *src3len); + uint32_t cc = 0; + + check_alignment(env, *src1len | *src3len, wordsize, ra); + + if (!len) { + return cc; + } + + /* Lest we fail to service interrupts in a timely manner, limit the + amount of work we're willing to do. */ + if (len > limit) { + len = limit; + cc = 3; + } + + for (; len; len -= wordsize) { + uint16_t v1 = pad; + uint16_t v3 = pad; + + if (*src1len) { + v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra); + } + if (*src3len) { + v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra); + } + + if (v1 != v3) { + cc = (v1 < v3) ? 1 : 2; + break; + } + + if (*src1len) { + *src1 += wordsize; + *src1len -= wordsize; + } + if (*src3len) { + *src3 += wordsize; + *src3len -= wordsize; + } + } + + return cc; +} + + +/* compare logical long */ +uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2) +{ + uintptr_t ra = GETPC(); + uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24); + uint64_t src1 = get_address(env, r1); + uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24); + uint64_t src3 = get_address(env, r2); + uint8_t pad = env->regs[r2 + 1] >> 24; + uint32_t cc; + + cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra); + + env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len); + env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len); + set_address(env, r1, src1); + set_address(env, r2, src3); + + return cc; +} + +/* compare logical long extended memcompare insn with padding */ +uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2, + uint32_t r3) +{ + uintptr_t ra = GETPC(); + uint64_t src1len = get_length(env, r1 + 1); + uint64_t src1 = get_address(env, r1); + uint64_t src3len = get_length(env, r3 + 1); + uint64_t src3 = get_address(env, r3); + uint8_t pad = a2; + uint32_t cc; + + cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra); + + set_length(env, r1 + 1, src1len); + set_length(env, r3 + 1, src3len); + set_address(env, r1, src1); + set_address(env, r3, src3); + + return cc; +} + +/* compare logical long unicode memcompare insn with padding */ +uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2, + uint32_t r3) +{ + uintptr_t ra = GETPC(); + uint64_t src1len = get_length(env, r1 + 1); + uint64_t src1 = get_address(env, r1); + uint64_t src3len = get_length(env, r3 + 1); + uint64_t src3 = get_address(env, r3); + uint16_t pad = a2; + uint32_t cc = 0; + + cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra); + + set_length(env, r1 + 1, src1len); + set_length(env, r3 + 1, src3len); + set_address(env, r1, src1); + set_address(env, r3, src3); + + return cc; +} + +/* checksum */ +uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1, + uint64_t src, uint64_t src_len) +{ + uintptr_t ra = GETPC(); + uint64_t max_len, len; + uint64_t cksm = (uint32_t)r1; + + /* Lest we fail to service interrupts in a timely manner, limit the + amount of work we're willing to do. For now, let's cap at 8k. */ + max_len = (src_len > 0x2000 ? 0x2000 : src_len); + + /* Process full words as available. */ + for (len = 0; len + 4 <= max_len; len += 4, src += 4) { + cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra); + } + + switch (max_len - len) { + case 1: + cksm += cpu_ldub_data_ra(env, src, ra) << 24; + len += 1; + break; + case 2: + cksm += cpu_lduw_data_ra(env, src, ra) << 16; + len += 2; + break; + case 3: + cksm += cpu_lduw_data_ra(env, src, ra) << 16; + cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8; + len += 3; + break; + } + + /* Fold the carry from the checksum. Note that we can see carry-out + during folding more than once (but probably not more than twice). */ + while (cksm > 0xffffffffull) { + cksm = (uint32_t)cksm + (cksm >> 32); + } + + /* Indicate whether or not we've processed everything. */ + env->cc_op = (len == src_len ? 0 : 3); + + /* Return both cksm and processed length. */ + env->retxl = cksm; + return len; +} + +void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src) +{ + uintptr_t ra = GETPC(); + int len_dest = len >> 4; + int len_src = len & 0xf; + uint8_t b; + + dest += len_dest; + src += len_src; + + /* last byte is special, it only flips the nibbles */ + b = cpu_ldub_data_ra(env, src, ra); + cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra); + src--; + len_src--; + + /* now pack every value */ + while (len_dest > 0) { + b = 0; + + if (len_src >= 0) { + b = cpu_ldub_data_ra(env, src, ra) & 0x0f; + src--; + len_src--; + } + if (len_src >= 0) { + b |= cpu_ldub_data_ra(env, src, ra) << 4; + src--; + len_src--; + } + + len_dest--; + dest--; + cpu_stb_data_ra(env, dest, b, ra); + } +} + +static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src, + uint32_t srclen, int ssize, uintptr_t ra) +{ + int i; + /* The destination operand is always 16 bytes long. */ + const int destlen = 16; + + /* The operands are processed from right to left. */ + src += srclen - 1; + dest += destlen - 1; + + for (i = 0; i < destlen; i++) { + uint8_t b = 0; + + /* Start with a positive sign */ + if (i == 0) { + b = 0xc; + } else if (srclen > ssize) { + b = cpu_ldub_data_ra(env, src, ra) & 0x0f; + src -= ssize; + srclen -= ssize; + } + + if (srclen > ssize) { + b |= cpu_ldub_data_ra(env, src, ra) << 4; + src -= ssize; + srclen -= ssize; + } + + cpu_stb_data_ra(env, dest, b, ra); + dest--; + } +} + + +void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src, + uint32_t srclen) +{ + do_pkau(env, dest, src, srclen, 1, GETPC()); +} + +void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src, + uint32_t srclen) +{ + do_pkau(env, dest, src, srclen, 2, GETPC()); +} + +void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest, + uint64_t src) +{ + uintptr_t ra = GETPC(); + int len_dest = len >> 4; + int len_src = len & 0xf; + uint8_t b; + int second_nibble = 0; + + dest += len_dest; + src += len_src; + + /* last byte is special, it only flips the nibbles */ + b = cpu_ldub_data_ra(env, src, ra); + cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra); + src--; + len_src--; + + /* now pad every nibble with 0xf0 */ + + while (len_dest > 0) { + uint8_t cur_byte = 0; + + if (len_src > 0) { + cur_byte = cpu_ldub_data_ra(env, src, ra); + } + + len_dest--; + dest--; + + /* only advance one nibble at a time */ + if (second_nibble) { + cur_byte >>= 4; + len_src--; + src--; + } + second_nibble = !second_nibble; + + /* digit */ + cur_byte = (cur_byte & 0xf); + /* zone bits */ + cur_byte |= 0xf0; + + cpu_stb_data_ra(env, dest, cur_byte, ra); + } +} + +static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest, + uint32_t destlen, int dsize, uint64_t src, + uintptr_t ra) +{ + int i; + uint32_t cc; + uint8_t b; + /* The source operand is always 16 bytes long. */ + const int srclen = 16; + + /* The operands are processed from right to left. */ + src += srclen - 1; + dest += destlen - dsize; + + /* Check for the sign. */ + b = cpu_ldub_data_ra(env, src, ra); + src--; + switch (b & 0xf) { + case 0xa: + case 0xc: + case 0xe ... 0xf: + cc = 0; /* plus */ + break; + case 0xb: + case 0xd: + cc = 1; /* minus */ + break; + default: + case 0x0 ... 0x9: + cc = 3; /* invalid */ + break; + } + + /* Now pad every nibble with 0x30, advancing one nibble at a time. */ + for (i = 0; i < destlen; i += dsize) { + if (i == (31 * dsize)) { + /* If length is 32/64 bytes, the leftmost byte is 0. */ + b = 0; + } else if (i % (2 * dsize)) { + b = cpu_ldub_data_ra(env, src, ra); + src--; + } else { + b >>= 4; + } + cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra); + dest -= dsize; + } + + return cc; +} + +uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen, + uint64_t src) +{ + return do_unpkau(env, dest, destlen, 1, src, GETPC()); +} + +uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen, + uint64_t src) +{ + return do_unpkau(env, dest, destlen, 2, src, GETPC()); +} + +uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen) +{ + uintptr_t ra = GETPC(); + uint32_t cc = 0; + int i; + + for (i = 0; i < destlen; i++) { + uint8_t b = cpu_ldub_data_ra(env, dest + i, ra); + /* digit */ + cc |= (b & 0xf0) > 0x90 ? 2 : 0; + + if (i == (destlen - 1)) { + /* sign */ + cc |= (b & 0xf) < 0xa ? 1 : 0; + } else { + /* digit */ + cc |= (b & 0xf) > 0x9 ? 2 : 0; + } + } + + return cc; +} + +static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array, + uint64_t trans, uintptr_t ra) +{ + uint32_t i; + + for (i = 0; i <= len; i++) { + uint8_t byte = cpu_ldub_data_ra(env, array + i, ra); + uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra); + cpu_stb_data_ra(env, array + i, new_byte, ra); + } + + return env->cc_op; +} + +void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array, + uint64_t trans) +{ + do_helper_tr(env, len, array, trans, GETPC()); +} + +uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array, + uint64_t len, uint64_t trans) +{ + uintptr_t ra = GETPC(); + uint8_t end = env->regs[0] & 0xff; + uint64_t l = len; + uint64_t i; + uint32_t cc = 0; + + if (!(env->psw.mask & PSW_MASK_64)) { + array &= 0x7fffffff; + l = (uint32_t)l; + } + + /* Lest we fail to service interrupts in a timely manner, limit the + amount of work we're willing to do. For now, let's cap at 8k. */ + if (l > 0x2000) { + l = 0x2000; + cc = 3; + } + + for (i = 0; i < l; i++) { + uint8_t byte, new_byte; + + byte = cpu_ldub_data_ra(env, array + i, ra); + + if (byte == end) { + cc = 1; + break; + } + + new_byte = cpu_ldub_data_ra(env, trans + byte, ra); + cpu_stb_data_ra(env, array + i, new_byte, ra); + } + + env->cc_op = cc; + env->retxl = len - i; + return array + i; +} + +static inline uint32_t do_helper_trt(CPUS390XState *env, int len, + uint64_t array, uint64_t trans, + int inc, uintptr_t ra) +{ + int i; + + for (i = 0; i <= len; i++) { + uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra); + uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra); + + if (sbyte != 0) { + set_address(env, 1, array + i * inc); + env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte); + return (i == len) ? 2 : 1; + } + } + + return 0; +} + +static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len, + uint64_t array, uint64_t trans, + uintptr_t ra) +{ + return do_helper_trt(env, len, array, trans, 1, ra); +} + +uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array, + uint64_t trans) +{ + return do_helper_trt(env, len, array, trans, 1, GETPC()); +} + +static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len, + uint64_t array, uint64_t trans, + uintptr_t ra) +{ + return do_helper_trt(env, len, array, trans, -1, ra); +} + +uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array, + uint64_t trans) +{ + return do_helper_trt(env, len, array, trans, -1, GETPC()); +} + +/* Translate one/two to one/two */ +uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2, + uint32_t tst, uint32_t sizes) +{ + uintptr_t ra = GETPC(); + int dsize = (sizes & 1) ? 1 : 2; + int ssize = (sizes & 2) ? 1 : 2; + uint64_t tbl = get_address(env, 1); + uint64_t dst = get_address(env, r1); + uint64_t len = get_length(env, r1 + 1); + uint64_t src = get_address(env, r2); + uint32_t cc = 3; + int i; + + /* The lower address bits of TBL are ignored. For TROO, TROT, it's + the low 3 bits (double-word aligned). For TRTO, TRTT, it's either + the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH). */ + if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) { + tbl &= -4096; + } else { + tbl &= -8; + } + + check_alignment(env, len, ssize, ra); + + /* Lest we fail to service interrupts in a timely manner, */ + /* limit the amount of work we're willing to do. */ + for (i = 0; i < 0x2000; i++) { + uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra); + uint64_t tble = tbl + (sval * dsize); + uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra); + if (dval == tst) { + cc = 1; + break; + } + cpu_stsize_data_ra(env, dst, dval, dsize, ra); + + len -= ssize; + src += ssize; + dst += dsize; + + if (len == 0) { + cc = 0; + break; + } + } + + set_address(env, r1, dst); + set_length(env, r1 + 1, len); + set_address(env, r2, src); + + return cc; +} + +void HELPER(cdsg)(CPUS390XState *env, uint64_t addr, + uint32_t r1, uint32_t r3) +{ + uintptr_t ra = GETPC(); + Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); + Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); + Int128 oldv; + uint64_t oldh, oldl; + bool fail; + + check_alignment(env, addr, 16, ra); + + oldh = cpu_ldq_data_ra(env, addr + 0, ra); + oldl = cpu_ldq_data_ra(env, addr + 8, ra); + + oldv = int128_make128(oldl, oldh); + fail = !int128_eq(oldv, cmpv); + if (fail) { + newv = oldv; + } + + cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra); + cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra); + + env->cc_op = fail; + env->regs[r1] = int128_gethi(oldv); + env->regs[r1 + 1] = int128_getlo(oldv); +} + +void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr, + uint32_t r1, uint32_t r3) +{ + uintptr_t ra = GETPC(); + Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]); + Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]); + int mem_idx; + TCGMemOpIdx oi; + Int128 oldv; + bool fail; + + assert(HAVE_CMPXCHG128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + fail = !int128_eq(oldv, cmpv); + + env->cc_op = fail; + env->regs[r1] = int128_gethi(oldv); + env->regs[r1 + 1] = int128_getlo(oldv); +} + +static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1, + uint64_t a2, bool parallel) +{ + uint32_t mem_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + uint32_t fc = extract32(env->regs[0], 0, 8); + uint32_t sc = extract32(env->regs[0], 8, 8); + uint64_t pl = get_address(env, 1) & -16; + uint64_t svh, svl; + uint32_t cc; + + /* Sanity check the function code and storage characteristic. */ + if (fc > 1 || sc > 3) { + if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) { + goto spec_exception; + } + if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) { + goto spec_exception; + } + } + + /* Sanity check the alignments. */ + if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) { + goto spec_exception; + } + + /* Sanity check writability of the store address. */ + probe_write(env, a2, 1 << sc, mem_idx, ra); + + /* + * Note that the compare-and-swap is atomic, and the store is atomic, + * but the complete operation is not. Therefore we do not need to + * assert serial context in order to implement this. That said, + * restart early if we can't support either operation that is supposed + * to be atomic. + */ + if (parallel) { + uint32_t max = 2; +#ifdef CONFIG_ATOMIC64 + max = 3; +#endif + if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) || + (HAVE_ATOMIC128 ? 0 : sc > max)) { + cpu_loop_exit_atomic(env_cpu(env), ra); + } + } + + /* All loads happen before all stores. For simplicity, load the entire + store value area from the parameter list. */ + svh = cpu_ldq_data_ra(env, pl + 16, ra); + svl = cpu_ldq_data_ra(env, pl + 24, ra); + + switch (fc) { + case 0: + { + uint32_t nv = cpu_ldl_data_ra(env, pl, ra); + uint32_t cv = env->regs[r3]; + uint32_t ov; + + if (parallel) { +#ifdef CONFIG_USER_ONLY + uint32_t *haddr = g2h(env_cpu(env), a1); + ov = qatomic_cmpxchg__nocheck(haddr, cv, nv); +#else + TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx); + ov = helper_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra); +#endif + } else { + ov = cpu_ldl_data_ra(env, a1, ra); + cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra); + } + cc = (ov != cv); + env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov); + } + break; + + case 1: + { + uint64_t nv = cpu_ldq_data_ra(env, pl, ra); + uint64_t cv = env->regs[r3]; + uint64_t ov; + + if (parallel) { +#ifdef CONFIG_ATOMIC64 +# ifdef CONFIG_USER_ONLY + uint64_t *haddr = g2h(env_cpu(env), a1); + ov = qatomic_cmpxchg__nocheck(haddr, cv, nv); +# else + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx); + ov = helper_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra); +# endif +#else + /* Note that we asserted !parallel above. */ + g_assert_not_reached(); +#endif + } else { + ov = cpu_ldq_data_ra(env, a1, ra); + cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra); + } + cc = (ov != cv); + env->regs[r3] = ov; + } + break; + + case 2: + { + uint64_t nvh = cpu_ldq_data_ra(env, pl, ra); + uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra); + Int128 nv = int128_make128(nvl, nvh); + Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]); + Int128 ov; + + if (!parallel) { + uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra); + uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra); + + ov = int128_make128(ol, oh); + cc = !int128_eq(ov, cv); + if (cc) { + nv = ov; + } + + cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra); + cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra); + } else if (HAVE_CMPXCHG128) { + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra); + cc = !int128_eq(ov, cv); + } else { + /* Note that we asserted !parallel above. */ + g_assert_not_reached(); + } + + env->regs[r3 + 0] = int128_gethi(ov); + env->regs[r3 + 1] = int128_getlo(ov); + } + break; + + default: + g_assert_not_reached(); + } + + /* Store only if the comparison succeeded. Note that above we use a pair + of 64-bit big-endian loads, so for sc < 3 we must extract the value + from the most-significant bits of svh. */ + if (cc == 0) { + switch (sc) { + case 0: + cpu_stb_data_ra(env, a2, svh >> 56, ra); + break; + case 1: + cpu_stw_data_ra(env, a2, svh >> 48, ra); + break; + case 2: + cpu_stl_data_ra(env, a2, svh >> 32, ra); + break; + case 3: + cpu_stq_data_ra(env, a2, svh, ra); + break; + case 4: + if (!parallel) { + cpu_stq_data_ra(env, a2 + 0, svh, ra); + cpu_stq_data_ra(env, a2 + 8, svl, ra); + } else if (HAVE_ATOMIC128) { + TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + Int128 sv = int128_make128(svl, svh); + helper_atomic_sto_be_mmu(env, a2, sv, oi, ra); + } else { + /* Note that we asserted !parallel above. */ + g_assert_not_reached(); + } + break; + default: + g_assert_not_reached(); + } + } + + return cc; + + spec_exception: + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); +} + +uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2) +{ + return do_csst(env, r3, a1, a2, false); +} + +uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1, + uint64_t a2) +{ + return do_csst(env, r3, a1, a2, true); +} + +#if !defined(CONFIG_USER_ONLY) +void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3) +{ + uintptr_t ra = GETPC(); + bool PERchanged = false; + uint64_t src = a2; + uint32_t i; + + if (src & 0x7) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + for (i = r1;; i = (i + 1) % 16) { + uint64_t val = cpu_ldq_data_ra(env, src, ra); + if (env->cregs[i] != val && i >= 9 && i <= 11) { + PERchanged = true; + } + env->cregs[i] = val; + HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n", + i, src, val); + src += sizeof(uint64_t); + + if (i == r3) { + break; + } + } + + if (PERchanged && env->psw.mask & PSW_MASK_PER) { + s390_cpu_recompute_watchpoints(env_cpu(env)); + } + + tlb_flush(env_cpu(env)); +} + +void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3) +{ + uintptr_t ra = GETPC(); + bool PERchanged = false; + uint64_t src = a2; + uint32_t i; + + if (src & 0x3) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + for (i = r1;; i = (i + 1) % 16) { + uint32_t val = cpu_ldl_data_ra(env, src, ra); + if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) { + PERchanged = true; + } + env->cregs[i] = deposit64(env->cregs[i], 0, 32, val); + HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val); + src += sizeof(uint32_t); + + if (i == r3) { + break; + } + } + + if (PERchanged && env->psw.mask & PSW_MASK_PER) { + s390_cpu_recompute_watchpoints(env_cpu(env)); + } + + tlb_flush(env_cpu(env)); +} + +void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3) +{ + uintptr_t ra = GETPC(); + uint64_t dest = a2; + uint32_t i; + + if (dest & 0x7) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + for (i = r1;; i = (i + 1) % 16) { + cpu_stq_data_ra(env, dest, env->cregs[i], ra); + dest += sizeof(uint64_t); + + if (i == r3) { + break; + } + } +} + +void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3) +{ + uintptr_t ra = GETPC(); + uint64_t dest = a2; + uint32_t i; + + if (dest & 0x3) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + for (i = r1;; i = (i + 1) % 16) { + cpu_stl_data_ra(env, dest, env->cregs[i], ra); + dest += sizeof(uint32_t); + + if (i == r3) { + break; + } + } +} + +uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr) +{ + uintptr_t ra = GETPC(); + int i; + + real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK; + + for (i = 0; i < TARGET_PAGE_SIZE; i += 8) { + cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra); + } + + return 0; +} + +uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2) +{ + S390CPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); + + /* + * TODO: we currently don't handle all access protection types + * (including access-list and key-controlled) as well as AR mode. + */ + if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) { + /* Fetching permitted; storing permitted */ + return 0; + } + + if (env->int_pgm_code == PGM_PROTECTION) { + /* retry if reading is possible */ + cs->exception_index = -1; + if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) { + /* Fetching permitted; storing not permitted */ + return 1; + } + } + + switch (env->int_pgm_code) { + case PGM_PROTECTION: + /* Fetching not permitted; storing not permitted */ + cs->exception_index = -1; + return 2; + case PGM_ADDRESSING: + case PGM_TRANS_SPEC: + /* exceptions forwarded to the guest */ + s390_cpu_virt_mem_handle_exc(cpu, GETPC()); + return 0; + } + + /* Translation not available */ + cs->exception_index = -1; + return 3; +} + +/* insert storage key extended */ +uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + static S390SKeysState *ss; + static S390SKeysClass *skeyclass; + uint64_t addr = wrap_address(env, r2); + uint8_t key; + + if (addr > ms->ram_size) { + return 0; + } + + if (unlikely(!ss)) { + ss = s390_get_skeys_device(); + skeyclass = S390_SKEYS_GET_CLASS(ss); + } + + if (skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key)) { + return 0; + } + return key; +} + +/* set storage key extended */ +void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + static S390SKeysState *ss; + static S390SKeysClass *skeyclass; + uint64_t addr = wrap_address(env, r2); + uint8_t key; + + if (addr > ms->ram_size) { + return; + } + + if (unlikely(!ss)) { + ss = s390_get_skeys_device(); + skeyclass = S390_SKEYS_GET_CLASS(ss); + } + + key = (uint8_t) r1; + skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key); + /* + * As we can only flush by virtual address and not all the entries + * that point to a physical address we have to flush the whole TLB. + */ + tlb_flush_all_cpus_synced(env_cpu(env)); +} + +/* reset reference bit extended */ +uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + static S390SKeysState *ss; + static S390SKeysClass *skeyclass; + uint8_t re, key; + + if (r2 > ms->ram_size) { + return 0; + } + + if (unlikely(!ss)) { + ss = s390_get_skeys_device(); + skeyclass = S390_SKEYS_GET_CLASS(ss); + } + + if (skeyclass->get_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) { + return 0; + } + + re = key & (SK_R | SK_C); + key &= ~SK_R; + + if (skeyclass->set_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) { + return 0; + } + /* + * As we can only flush by virtual address and not all the entries + * that point to a physical address we have to flush the whole TLB. + */ + tlb_flush_all_cpus_synced(env_cpu(env)); + + /* + * cc + * + * 0 Reference bit zero; change bit zero + * 1 Reference bit zero; change bit one + * 2 Reference bit one; change bit zero + * 3 Reference bit one; change bit one + */ + + return re >> 1; +} + +uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2) +{ + const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC; + S390Access srca, desta; + uintptr_t ra = GETPC(); + int cc = 0; + + HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n", + __func__, l, a1, a2); + + if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) || + psw_as == AS_HOME || psw_as == AS_ACCREG) { + s390_program_interrupt(env, PGM_SPECIAL_OP, ra); + } + + l = wrap_length32(env, l); + if (l > 256) { + /* max 256 */ + l = 256; + cc = 3; + } else if (!l) { + return cc; + } + + /* TODO: Access key handling */ + srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra); + desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra); + access_memmove(env, &desta, &srca, ra); + return cc; +} + +uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2) +{ + const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC; + S390Access srca, desta; + uintptr_t ra = GETPC(); + int cc = 0; + + HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n", + __func__, l, a1, a2); + + if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) || + psw_as == AS_HOME || psw_as == AS_ACCREG) { + s390_program_interrupt(env, PGM_SPECIAL_OP, ra); + } + + l = wrap_length32(env, l); + if (l > 256) { + /* max 256 */ + l = 256; + cc = 3; + } else if (!l) { + return cc; + } + + /* TODO: Access key handling */ + srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra); + desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra); + access_memmove(env, &desta, &srca, ra); + return cc; +} + +void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4) +{ + CPUState *cs = env_cpu(env); + const uintptr_t ra = GETPC(); + uint64_t table, entry, raddr; + uint16_t entries, i, index = 0; + + if (r2 & 0xff000) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + if (!(r2 & 0x800)) { + /* invalidation-and-clearing operation */ + table = r1 & ASCE_ORIGIN; + entries = (r2 & 0x7ff) + 1; + + switch (r1 & ASCE_TYPE_MASK) { + case ASCE_TYPE_REGION1: + index = (r2 >> 53) & 0x7ff; + break; + case ASCE_TYPE_REGION2: + index = (r2 >> 42) & 0x7ff; + break; + case ASCE_TYPE_REGION3: + index = (r2 >> 31) & 0x7ff; + break; + case ASCE_TYPE_SEGMENT: + index = (r2 >> 20) & 0x7ff; + break; + } + for (i = 0; i < entries; i++) { + /* addresses are not wrapped in 24/31bit mode but table index is */ + raddr = table + ((index + i) & 0x7ff) * sizeof(entry); + entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra); + if (!(entry & REGION_ENTRY_I)) { + /* we are allowed to not store if already invalid */ + entry |= REGION_ENTRY_I; + cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra); + } + } + } + + /* We simply flush the complete tlb, therefore we can ignore r3. */ + if (m4 & 1) { + tlb_flush(cs); + } else { + tlb_flush_all_cpus_synced(cs); + } +} + +/* invalidate pte */ +void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr, + uint32_t m4) +{ + CPUState *cs = env_cpu(env); + const uintptr_t ra = GETPC(); + uint64_t page = vaddr & TARGET_PAGE_MASK; + uint64_t pte_addr, pte; + + /* Compute the page table entry address */ + pte_addr = (pto & SEGMENT_ENTRY_ORIGIN); + pte_addr += VADDR_PAGE_TX(vaddr) * 8; + + /* Mark the page table entry as invalid */ + pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra); + pte |= PAGE_ENTRY_I; + cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra); + + /* XXX we exploit the fact that Linux passes the exact virtual + address here - it's not obliged to! */ + if (m4 & 1) { + if (vaddr & ~VADDR_PAGE_TX_MASK) { + tlb_flush_page(cs, page); + /* XXX 31-bit hack */ + tlb_flush_page(cs, page ^ 0x80000000); + } else { + /* looks like we don't have a valid virtual address */ + tlb_flush(cs); + } + } else { + if (vaddr & ~VADDR_PAGE_TX_MASK) { + tlb_flush_page_all_cpus_synced(cs, page); + /* XXX 31-bit hack */ + tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000); + } else { + /* looks like we don't have a valid virtual address */ + tlb_flush_all_cpus_synced(cs); + } + } +} + +/* flush local tlb */ +void HELPER(ptlb)(CPUS390XState *env) +{ + tlb_flush(env_cpu(env)); +} + +/* flush global tlb */ +void HELPER(purge)(CPUS390XState *env) +{ + tlb_flush_all_cpus_synced(env_cpu(env)); +} + +/* load real address */ +uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr) +{ + uint64_t asc = env->psw.mask & PSW_MASK_ASC; + uint64_t ret, tec; + int flags, exc, cc; + + /* XXX incomplete - has more corner cases */ + if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) { + tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC()); + } + + exc = mmu_translate(env, addr, 0, asc, &ret, &flags, &tec); + if (exc) { + cc = 3; + ret = exc | 0x80000000; + } else { + cc = 0; + ret |= addr & ~TARGET_PAGE_MASK; + } + + env->cc_op = cc; + return ret; +} +#endif + +/* load pair from quadword */ +uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr) +{ + uintptr_t ra = GETPC(); + uint64_t hi, lo; + + check_alignment(env, addr, 16, ra); + hi = cpu_ldq_data_ra(env, addr + 0, ra); + lo = cpu_ldq_data_ra(env, addr + 8, ra); + + env->retxl = lo; + return hi; +} + +uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr) +{ + uintptr_t ra = GETPC(); + uint64_t hi, lo; + int mem_idx; + TCGMemOpIdx oi; + Int128 v; + + assert(HAVE_ATOMIC128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + v = helper_atomic_ldo_be_mmu(env, addr, oi, ra); + hi = int128_gethi(v); + lo = int128_getlo(v); + + env->retxl = lo; + return hi; +} + +/* store pair to quadword */ +void HELPER(stpq)(CPUS390XState *env, uint64_t addr, + uint64_t low, uint64_t high) +{ + uintptr_t ra = GETPC(); + + check_alignment(env, addr, 16, ra); + cpu_stq_data_ra(env, addr + 0, high, ra); + cpu_stq_data_ra(env, addr + 8, low, ra); +} + +void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr, + uint64_t low, uint64_t high) +{ + uintptr_t ra = GETPC(); + int mem_idx; + TCGMemOpIdx oi; + Int128 v; + + assert(HAVE_ATOMIC128); + + mem_idx = cpu_mmu_index(env, false); + oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); + v = int128_make128(low, high); + helper_atomic_sto_be_mmu(env, addr, v, oi, ra); +} + +/* Execute instruction. This instruction executes an insn modified with + the contents of r1. It does not change the executed instruction in memory; + it does not change the program counter. + + Perform this by recording the modified instruction in env->ex_value. + This will be noticed by cpu_get_tb_cpu_state and thus tb translation. +*/ +void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr) +{ + uint64_t insn = cpu_lduw_code(env, addr); + uint8_t opc = insn >> 8; + + /* Or in the contents of R1[56:63]. */ + insn |= r1 & 0xff; + + /* Load the rest of the instruction. */ + insn <<= 48; + switch (get_ilen(opc)) { + case 2: + break; + case 4: + insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32; + break; + case 6: + insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16; + break; + default: + g_assert_not_reached(); + } + + /* The very most common cases can be sped up by avoiding a new TB. */ + if ((opc & 0xf0) == 0xd0) { + typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t, + uint64_t, uintptr_t); + static const dx_helper dx[16] = { + [0x0] = do_helper_trt_bkwd, + [0x2] = do_helper_mvc, + [0x4] = do_helper_nc, + [0x5] = do_helper_clc, + [0x6] = do_helper_oc, + [0x7] = do_helper_xc, + [0xc] = do_helper_tr, + [0xd] = do_helper_trt_fwd, + }; + dx_helper helper = dx[opc & 0xf]; + + if (helper) { + uint32_t l = extract64(insn, 48, 8); + uint32_t b1 = extract64(insn, 44, 4); + uint32_t d1 = extract64(insn, 32, 12); + uint32_t b2 = extract64(insn, 28, 4); + uint32_t d2 = extract64(insn, 16, 12); + uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1); + uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2); + + env->cc_op = helper(env, l, a1, a2, 0); + env->psw.addr += ilen; + return; + } + } else if (opc == 0x0a) { + env->int_svc_code = extract64(insn, 48, 8); + env->int_svc_ilen = ilen; + helper_exception(env, EXCP_SVC); + g_assert_not_reached(); + } + + /* Record the insn we want to execute as well as the ilen to use + during the execution of the target insn. This will also ensure + that ex_value is non-zero, which flags that we are in a state + that requires such execution. */ + env->ex_value = insn | ilen; +} + +uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src, + uint64_t len) +{ + const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY; + const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC; + const uint64_t r0 = env->regs[0]; + const uintptr_t ra = GETPC(); + uint8_t dest_key, dest_as, dest_k, dest_a; + uint8_t src_key, src_as, src_k, src_a; + uint64_t val; + int cc = 0; + + HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n", + __func__, dest, src, len); + + if (!(env->psw.mask & PSW_MASK_DAT)) { + tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra); + } + + /* OAC (operand access control) for the first operand -> dest */ + val = (r0 & 0xffff0000ULL) >> 16; + dest_key = (val >> 12) & 0xf; + dest_as = (val >> 6) & 0x3; + dest_k = (val >> 1) & 0x1; + dest_a = val & 0x1; + + /* OAC (operand access control) for the second operand -> src */ + val = (r0 & 0x0000ffffULL); + src_key = (val >> 12) & 0xf; + src_as = (val >> 6) & 0x3; + src_k = (val >> 1) & 0x1; + src_a = val & 0x1; + + if (!dest_k) { + dest_key = psw_key; + } + if (!src_k) { + src_key = psw_key; + } + if (!dest_a) { + dest_as = psw_as; + } + if (!src_a) { + src_as = psw_as; + } + + if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) { + tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra); + } + if (!(env->cregs[0] & CR0_SECONDARY) && + (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) { + tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra); + } + if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) { + tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra); + } + + len = wrap_length32(env, len); + if (len > 4096) { + cc = 3; + len = 4096; + } + + /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */ + if (src_as == AS_ACCREG || dest_as == AS_ACCREG || + (env->psw.mask & PSW_MASK_PSTATE)) { + qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n", + __func__); + tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra); + } + + /* FIXME: Access using correct keys and AR-mode */ + if (len) { + S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD, + mmu_idx_from_as(src_as), ra); + S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE, + mmu_idx_from_as(dest_as), ra); + + access_memmove(env, &desta, &srca, ra); + } + + return cc; +} + +/* Decode a Unicode character. A return value < 0 indicates success, storing + the UTF-32 result into OCHAR and the input length into OLEN. A return + value >= 0 indicates failure, and the CC value to be returned. */ +typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr, + uint64_t ilen, bool enh_check, uintptr_t ra, + uint32_t *ochar, uint32_t *olen); + +/* Encode a Unicode character. A return value < 0 indicates success, storing + the bytes into ADDR and the output length into OLEN. A return value >= 0 + indicates failure, and the CC value to be returned. */ +typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr, + uint64_t ilen, uintptr_t ra, uint32_t c, + uint32_t *olen); + +static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen, + bool enh_check, uintptr_t ra, + uint32_t *ochar, uint32_t *olen) +{ + uint8_t s0, s1, s2, s3; + uint32_t c, l; + + if (ilen < 1) { + return 0; + } + s0 = cpu_ldub_data_ra(env, addr, ra); + if (s0 <= 0x7f) { + /* one byte character */ + l = 1; + c = s0; + } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) { + /* invalid character */ + return 2; + } else if (s0 <= 0xdf) { + /* two byte character */ + l = 2; + if (ilen < 2) { + return 0; + } + s1 = cpu_ldub_data_ra(env, addr + 1, ra); + c = s0 & 0x1f; + c = (c << 6) | (s1 & 0x3f); + if (enh_check && (s1 & 0xc0) != 0x80) { + return 2; + } + } else if (s0 <= 0xef) { + /* three byte character */ + l = 3; + if (ilen < 3) { + return 0; + } + s1 = cpu_ldub_data_ra(env, addr + 1, ra); + s2 = cpu_ldub_data_ra(env, addr + 2, ra); + c = s0 & 0x0f; + c = (c << 6) | (s1 & 0x3f); + c = (c << 6) | (s2 & 0x3f); + /* Fold the byte-by-byte range descriptions in the PoO into + tests against the complete value. It disallows encodings + that could be smaller, and the UTF-16 surrogates. */ + if (enh_check + && ((s1 & 0xc0) != 0x80 + || (s2 & 0xc0) != 0x80 + || c < 0x1000 + || (c >= 0xd800 && c <= 0xdfff))) { + return 2; + } + } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) { + /* four byte character */ + l = 4; + if (ilen < 4) { + return 0; + } + s1 = cpu_ldub_data_ra(env, addr + 1, ra); + s2 = cpu_ldub_data_ra(env, addr + 2, ra); + s3 = cpu_ldub_data_ra(env, addr + 3, ra); + c = s0 & 0x07; + c = (c << 6) | (s1 & 0x3f); + c = (c << 6) | (s2 & 0x3f); + c = (c << 6) | (s3 & 0x3f); + /* See above. */ + if (enh_check + && ((s1 & 0xc0) != 0x80 + || (s2 & 0xc0) != 0x80 + || (s3 & 0xc0) != 0x80 + || c < 0x010000 + || c > 0x10ffff)) { + return 2; + } + } else { + /* invalid character */ + return 2; + } + + *ochar = c; + *olen = l; + return -1; +} + +static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen, + bool enh_check, uintptr_t ra, + uint32_t *ochar, uint32_t *olen) +{ + uint16_t s0, s1; + uint32_t c, l; + + if (ilen < 2) { + return 0; + } + s0 = cpu_lduw_data_ra(env, addr, ra); + if ((s0 & 0xfc00) != 0xd800) { + /* one word character */ + l = 2; + c = s0; + } else { + /* two word character */ + l = 4; + if (ilen < 4) { + return 0; + } + s1 = cpu_lduw_data_ra(env, addr + 2, ra); + c = extract32(s0, 6, 4) + 1; + c = (c << 6) | (s0 & 0x3f); + c = (c << 10) | (s1 & 0x3ff); + if (enh_check && (s1 & 0xfc00) != 0xdc00) { + /* invalid surrogate character */ + return 2; + } + } + + *ochar = c; + *olen = l; + return -1; +} + +static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen, + bool enh_check, uintptr_t ra, + uint32_t *ochar, uint32_t *olen) +{ + uint32_t c; + + if (ilen < 4) { + return 0; + } + c = cpu_ldl_data_ra(env, addr, ra); + if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) { + /* invalid unicode character */ + return 2; + } + + *ochar = c; + *olen = 4; + return -1; +} + +static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen, + uintptr_t ra, uint32_t c, uint32_t *olen) +{ + uint8_t d[4]; + uint32_t l, i; + + if (c <= 0x7f) { + /* one byte character */ + l = 1; + d[0] = c; + } else if (c <= 0x7ff) { + /* two byte character */ + l = 2; + d[1] = 0x80 | extract32(c, 0, 6); + d[0] = 0xc0 | extract32(c, 6, 5); + } else if (c <= 0xffff) { + /* three byte character */ + l = 3; + d[2] = 0x80 | extract32(c, 0, 6); + d[1] = 0x80 | extract32(c, 6, 6); + d[0] = 0xe0 | extract32(c, 12, 4); + } else { + /* four byte character */ + l = 4; + d[3] = 0x80 | extract32(c, 0, 6); + d[2] = 0x80 | extract32(c, 6, 6); + d[1] = 0x80 | extract32(c, 12, 6); + d[0] = 0xf0 | extract32(c, 18, 3); + } + + if (ilen < l) { + return 1; + } + for (i = 0; i < l; ++i) { + cpu_stb_data_ra(env, addr + i, d[i], ra); + } + + *olen = l; + return -1; +} + +static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen, + uintptr_t ra, uint32_t c, uint32_t *olen) +{ + uint16_t d0, d1; + + if (c <= 0xffff) { + /* one word character */ + if (ilen < 2) { + return 1; + } + cpu_stw_data_ra(env, addr, c, ra); + *olen = 2; + } else { + /* two word character */ + if (ilen < 4) { + return 1; + } + d1 = 0xdc00 | extract32(c, 0, 10); + d0 = 0xd800 | extract32(c, 10, 6); + d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1); + cpu_stw_data_ra(env, addr + 0, d0, ra); + cpu_stw_data_ra(env, addr + 2, d1, ra); + *olen = 4; + } + + return -1; +} + +static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen, + uintptr_t ra, uint32_t c, uint32_t *olen) +{ + if (ilen < 4) { + return 1; + } + cpu_stl_data_ra(env, addr, c, ra); + *olen = 4; + return -1; +} + +static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1, + uint32_t r2, uint32_t m3, uintptr_t ra, + decode_unicode_fn decode, + encode_unicode_fn encode) +{ + uint64_t dst = get_address(env, r1); + uint64_t dlen = get_length(env, r1 + 1); + uint64_t src = get_address(env, r2); + uint64_t slen = get_length(env, r2 + 1); + bool enh_check = m3 & 1; + int cc, i; + + /* Lest we fail to service interrupts in a timely manner, limit the + amount of work we're willing to do. For now, let's cap at 256. */ + for (i = 0; i < 256; ++i) { + uint32_t c, ilen, olen; + + cc = decode(env, src, slen, enh_check, ra, &c, &ilen); + if (unlikely(cc >= 0)) { + break; + } + cc = encode(env, dst, dlen, ra, c, &olen); + if (unlikely(cc >= 0)) { + break; + } + + src += ilen; + slen -= ilen; + dst += olen; + dlen -= olen; + cc = 3; + } + + set_address(env, r1, dst); + set_length(env, r1 + 1, dlen); + set_address(env, r2, src); + set_length(env, r2 + 1, slen); + + return cc; +} + +uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3) +{ + return convert_unicode(env, r1, r2, m3, GETPC(), + decode_utf8, encode_utf16); +} + +uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3) +{ + return convert_unicode(env, r1, r2, m3, GETPC(), + decode_utf8, encode_utf32); +} + +uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3) +{ + return convert_unicode(env, r1, r2, m3, GETPC(), + decode_utf16, encode_utf8); +} + +uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3) +{ + return convert_unicode(env, r1, r2, m3, GETPC(), + decode_utf16, encode_utf32); +} + +uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3) +{ + return convert_unicode(env, r1, r2, m3, GETPC(), + decode_utf32, encode_utf8); +} + +uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3) +{ + return convert_unicode(env, r1, r2, m3, GETPC(), + decode_utf32, encode_utf16); +} + +void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len, + uintptr_t ra) +{ + /* test the actual access, not just any access to the page due to LAP */ + while (len) { + const uint64_t pagelen = -(addr | TARGET_PAGE_MASK); + const uint64_t curlen = MIN(pagelen, len); + + probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra); + addr = wrap_address(env, addr + curlen); + len -= curlen; + } +} + +void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len) +{ + probe_write_access(env, addr, len, GETPC()); +} diff --git a/target/s390x/tcg/meson.build b/target/s390x/tcg/meson.build new file mode 100644 index 0000000000..ee4e8fec77 --- /dev/null +++ b/target/s390x/tcg/meson.build @@ -0,0 +1,14 @@ +s390x_ss.add(when: 'CONFIG_TCG', if_true: files( + 'cc_helper.c', + 'crypto_helper.c', + 'excp_helper.c', + 'fpu_helper.c', + 'int_helper.c', + 'mem_helper.c', + 'misc_helper.c', + 'translate.c', + 'vec_fpu_helper.c', + 'vec_helper.c', + 'vec_int_helper.c', + 'vec_string_helper.c', +)) diff --git a/target/s390x/tcg/misc_helper.c b/target/s390x/tcg/misc_helper.c new file mode 100644 index 0000000000..33e6999e15 --- /dev/null +++ b/target/s390x/tcg/misc_helper.c @@ -0,0 +1,785 @@ +/* + * S/390 misc helper routines + * + * Copyright (c) 2009 Ulrich Hecht + * Copyright (c) 2009 Alexander Graf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "qemu/main-loop.h" +#include "cpu.h" +#include "s390x-internal.h" +#include "exec/memory.h" +#include "qemu/host-utils.h" +#include "exec/helper-proto.h" +#include "qemu/timer.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "qapi/error.h" +#include "tcg_s390x.h" +#include "s390-tod.h" + +#if !defined(CONFIG_USER_ONLY) +#include "sysemu/cpus.h" +#include "sysemu/sysemu.h" +#include "hw/s390x/ebcdic.h" +#include "hw/s390x/s390-virtio-hcall.h" +#include "hw/s390x/sclp.h" +#include "hw/s390x/s390_flic.h" +#include "hw/s390x/ioinst.h" +#include "hw/s390x/s390-pci-inst.h" +#include "hw/boards.h" +#include "hw/s390x/tod.h" +#endif + +/* #define DEBUG_HELPER */ +#ifdef DEBUG_HELPER +#define HELPER_LOG(x...) qemu_log(x) +#else +#define HELPER_LOG(x...) +#endif + +/* Raise an exception statically from a TB. */ +void HELPER(exception)(CPUS390XState *env, uint32_t excp) +{ + CPUState *cs = env_cpu(env); + + HELPER_LOG("%s: exception %d\n", __func__, excp); + cs->exception_index = excp; + cpu_loop_exit(cs); +} + +/* Store CPU Timer (also used for EXTRACT CPU TIME) */ +uint64_t HELPER(stpt)(CPUS390XState *env) +{ +#if defined(CONFIG_USER_ONLY) + /* + * Fake a descending CPU timer. We could get negative values here, + * but we don't care as it is up to the OS when to process that + * interrupt and reset to > 0. + */ + return UINT64_MAX - (uint64_t)cpu_get_host_ticks(); +#else + return time2tod(env->cputm - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); +#endif +} + +/* Store Clock */ +uint64_t HELPER(stck)(CPUS390XState *env) +{ +#ifdef CONFIG_USER_ONLY + struct timespec ts; + uint64_t ns; + + clock_gettime(CLOCK_REALTIME, &ts); + ns = ts.tv_sec * NANOSECONDS_PER_SECOND + ts.tv_nsec; + + return TOD_UNIX_EPOCH + time2tod(ns); +#else + S390TODState *td = s390_get_todstate(); + S390TODClass *tdc = S390_TOD_GET_CLASS(td); + S390TOD tod; + + tdc->get(td, &tod, &error_abort); + return tod.low; +#endif +} + +#ifndef CONFIG_USER_ONLY +/* SCLP service call */ +uint32_t HELPER(servc)(CPUS390XState *env, uint64_t r1, uint64_t r2) +{ + qemu_mutex_lock_iothread(); + int r = sclp_service_call(env, r1, r2); + qemu_mutex_unlock_iothread(); + if (r < 0) { + tcg_s390_program_interrupt(env, -r, GETPC()); + } + return r; +} + +void HELPER(diag)(CPUS390XState *env, uint32_t r1, uint32_t r3, uint32_t num) +{ + uint64_t r; + + switch (num) { + case 0x500: + /* KVM hypercall */ + qemu_mutex_lock_iothread(); + r = s390_virtio_hypercall(env); + qemu_mutex_unlock_iothread(); + break; + case 0x44: + /* yield */ + r = 0; + break; + case 0x308: + /* ipl */ + qemu_mutex_lock_iothread(); + handle_diag_308(env, r1, r3, GETPC()); + qemu_mutex_unlock_iothread(); + r = 0; + break; + case 0x288: + /* time bomb (watchdog) */ + r = handle_diag_288(env, r1, r3); + break; + default: + r = -1; + break; + } + + if (r) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC()); + } +} + +/* Set Prefix */ +void HELPER(spx)(CPUS390XState *env, uint64_t a1) +{ + CPUState *cs = env_cpu(env); + uint32_t prefix = a1 & 0x7fffe000; + + env->psa = prefix; + HELPER_LOG("prefix: %#x\n", prefix); + tlb_flush_page(cs, 0); + tlb_flush_page(cs, TARGET_PAGE_SIZE); +} + +static void update_ckc_timer(CPUS390XState *env) +{ + S390TODState *td = s390_get_todstate(); + uint64_t time; + + /* stop the timer and remove pending CKC IRQs */ + timer_del(env->tod_timer); + g_assert(qemu_mutex_iothread_locked()); + env->pending_int &= ~INTERRUPT_EXT_CLOCK_COMPARATOR; + + /* the tod has to exceed the ckc, this can never happen if ckc is all 1's */ + if (env->ckc == -1ULL) { + return; + } + + /* difference between origins */ + time = env->ckc - td->base.low; + + /* nanoseconds */ + time = tod2time(time); + + timer_mod(env->tod_timer, time); +} + +/* Set Clock Comparator */ +void HELPER(sckc)(CPUS390XState *env, uint64_t ckc) +{ + env->ckc = ckc; + + qemu_mutex_lock_iothread(); + update_ckc_timer(env); + qemu_mutex_unlock_iothread(); +} + +void tcg_s390_tod_updated(CPUState *cs, run_on_cpu_data opaque) +{ + S390CPU *cpu = S390_CPU(cs); + + update_ckc_timer(&cpu->env); +} + +/* Set Clock */ +uint32_t HELPER(sck)(CPUS390XState *env, uint64_t tod_low) +{ + S390TODState *td = s390_get_todstate(); + S390TODClass *tdc = S390_TOD_GET_CLASS(td); + S390TOD tod = { + .high = 0, + .low = tod_low, + }; + + qemu_mutex_lock_iothread(); + tdc->set(td, &tod, &error_abort); + qemu_mutex_unlock_iothread(); + return 0; +} + +/* Set Tod Programmable Field */ +void HELPER(sckpf)(CPUS390XState *env, uint64_t r0) +{ + uint32_t val = r0; + + if (val & 0xffff0000) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC()); + } + env->todpr = val; +} + +/* Store Clock Comparator */ +uint64_t HELPER(stckc)(CPUS390XState *env) +{ + return env->ckc; +} + +/* Set CPU Timer */ +void HELPER(spt)(CPUS390XState *env, uint64_t time) +{ + if (time == -1ULL) { + return; + } + + /* nanoseconds */ + time = tod2time(time); + + env->cputm = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + time; + + timer_mod(env->cpu_timer, env->cputm); +} + +/* Store System Information */ +uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0, uint64_t r0, uint64_t r1) +{ + const uintptr_t ra = GETPC(); + const uint32_t sel1 = r0 & STSI_R0_SEL1_MASK; + const uint32_t sel2 = r1 & STSI_R1_SEL2_MASK; + const MachineState *ms = MACHINE(qdev_get_machine()); + uint16_t total_cpus = 0, conf_cpus = 0, reserved_cpus = 0; + S390CPU *cpu = env_archcpu(env); + SysIB sysib = { }; + int i, cc = 0; + + if ((r0 & STSI_R0_FC_MASK) > STSI_R0_FC_LEVEL_3) { + /* invalid function code: no other checks are performed */ + return 3; + } + + if ((r0 & STSI_R0_RESERVED_MASK) || (r1 & STSI_R1_RESERVED_MASK)) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + if ((r0 & STSI_R0_FC_MASK) == STSI_R0_FC_CURRENT) { + /* query the current level: no further checks are performed */ + env->regs[0] = STSI_R0_FC_LEVEL_3; + return 0; + } + + if (a0 & ~TARGET_PAGE_MASK) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + /* count the cpus and split them into configured and reserved ones */ + for (i = 0; i < ms->possible_cpus->len; i++) { + total_cpus++; + if (ms->possible_cpus->cpus[i].cpu) { + conf_cpus++; + } else { + reserved_cpus++; + } + } + + /* + * In theory, we could report Level 1 / Level 2 as current. However, + * the Linux kernel will detect this as running under LPAR and assume + * that we have a sclp linemode console (which is always present on + * LPAR, but not the default for QEMU), therefore not displaying boot + * messages and making booting a Linux kernel under TCG harder. + * + * For now we fake the same SMP configuration on all levels. + * + * TODO: We could later make the level configurable via the machine + * and change defaults (linemode console) based on machine type + * and accelerator. + */ + switch (r0 & STSI_R0_FC_MASK) { + case STSI_R0_FC_LEVEL_1: + if ((sel1 == 1) && (sel2 == 1)) { + /* Basic Machine Configuration */ + char type[5] = {}; + + ebcdic_put(sysib.sysib_111.manuf, "QEMU ", 16); + /* same as machine type number in STORE CPU ID, but in EBCDIC */ + snprintf(type, ARRAY_SIZE(type), "%X", cpu->model->def->type); + ebcdic_put(sysib.sysib_111.type, type, 4); + /* model number (not stored in STORE CPU ID for z/Architecure) */ + ebcdic_put(sysib.sysib_111.model, "QEMU ", 16); + ebcdic_put(sysib.sysib_111.sequence, "QEMU ", 16); + ebcdic_put(sysib.sysib_111.plant, "QEMU", 4); + } else if ((sel1 == 2) && (sel2 == 1)) { + /* Basic Machine CPU */ + ebcdic_put(sysib.sysib_121.sequence, "QEMUQEMUQEMUQEMU", 16); + ebcdic_put(sysib.sysib_121.plant, "QEMU", 4); + sysib.sysib_121.cpu_addr = cpu_to_be16(env->core_id); + } else if ((sel1 == 2) && (sel2 == 2)) { + /* Basic Machine CPUs */ + sysib.sysib_122.capability = cpu_to_be32(0x443afc29); + sysib.sysib_122.total_cpus = cpu_to_be16(total_cpus); + sysib.sysib_122.conf_cpus = cpu_to_be16(conf_cpus); + sysib.sysib_122.reserved_cpus = cpu_to_be16(reserved_cpus); + } else { + cc = 3; + } + break; + case STSI_R0_FC_LEVEL_2: + if ((sel1 == 2) && (sel2 == 1)) { + /* LPAR CPU */ + ebcdic_put(sysib.sysib_221.sequence, "QEMUQEMUQEMUQEMU", 16); + ebcdic_put(sysib.sysib_221.plant, "QEMU", 4); + sysib.sysib_221.cpu_addr = cpu_to_be16(env->core_id); + } else if ((sel1 == 2) && (sel2 == 2)) { + /* LPAR CPUs */ + sysib.sysib_222.lcpuc = 0x80; /* dedicated */ + sysib.sysib_222.total_cpus = cpu_to_be16(total_cpus); + sysib.sysib_222.conf_cpus = cpu_to_be16(conf_cpus); + sysib.sysib_222.reserved_cpus = cpu_to_be16(reserved_cpus); + ebcdic_put(sysib.sysib_222.name, "QEMU ", 8); + sysib.sysib_222.caf = cpu_to_be32(1000); + sysib.sysib_222.dedicated_cpus = cpu_to_be16(conf_cpus); + } else { + cc = 3; + } + break; + case STSI_R0_FC_LEVEL_3: + if ((sel1 == 2) && (sel2 == 2)) { + /* VM CPUs */ + sysib.sysib_322.count = 1; + sysib.sysib_322.vm[0].total_cpus = cpu_to_be16(total_cpus); + sysib.sysib_322.vm[0].conf_cpus = cpu_to_be16(conf_cpus); + sysib.sysib_322.vm[0].reserved_cpus = cpu_to_be16(reserved_cpus); + sysib.sysib_322.vm[0].caf = cpu_to_be32(1000); + /* Linux kernel uses this to distinguish us from z/VM */ + ebcdic_put(sysib.sysib_322.vm[0].cpi, "KVM/Linux ", 16); + sysib.sysib_322.vm[0].ext_name_encoding = 2; /* UTF-8 */ + + /* If our VM has a name, use the real name */ + if (qemu_name) { + memset(sysib.sysib_322.vm[0].name, 0x40, + sizeof(sysib.sysib_322.vm[0].name)); + ebcdic_put(sysib.sysib_322.vm[0].name, qemu_name, + MIN(sizeof(sysib.sysib_322.vm[0].name), + strlen(qemu_name))); + strpadcpy((char *)sysib.sysib_322.ext_names[0], + sizeof(sysib.sysib_322.ext_names[0]), + qemu_name, '\0'); + + } else { + ebcdic_put(sysib.sysib_322.vm[0].name, "TCGguest", 8); + strcpy((char *)sysib.sysib_322.ext_names[0], "TCGguest"); + } + + /* add the uuid */ + memcpy(sysib.sysib_322.vm[0].uuid, &qemu_uuid, + sizeof(sysib.sysib_322.vm[0].uuid)); + } else { + cc = 3; + } + break; + } + + if (cc == 0) { + if (s390_cpu_virt_mem_write(cpu, a0, 0, &sysib, sizeof(sysib))) { + s390_cpu_virt_mem_handle_exc(cpu, ra); + } + } + + return cc; +} + +uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1, + uint32_t r3) +{ + int cc; + + /* TODO: needed to inject interrupts - push further down */ + qemu_mutex_lock_iothread(); + cc = handle_sigp(env, order_code & SIGP_ORDER_MASK, r1, r3); + qemu_mutex_unlock_iothread(); + + return cc; +} +#endif + +#ifndef CONFIG_USER_ONLY +void HELPER(xsch)(CPUS390XState *env, uint64_t r1) +{ + S390CPU *cpu = env_archcpu(env); + qemu_mutex_lock_iothread(); + ioinst_handle_xsch(cpu, r1, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(csch)(CPUS390XState *env, uint64_t r1) +{ + S390CPU *cpu = env_archcpu(env); + qemu_mutex_lock_iothread(); + ioinst_handle_csch(cpu, r1, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(hsch)(CPUS390XState *env, uint64_t r1) +{ + S390CPU *cpu = env_archcpu(env); + qemu_mutex_lock_iothread(); + ioinst_handle_hsch(cpu, r1, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(msch)(CPUS390XState *env, uint64_t r1, uint64_t inst) +{ + S390CPU *cpu = env_archcpu(env); + qemu_mutex_lock_iothread(); + ioinst_handle_msch(cpu, r1, inst >> 16, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(rchp)(CPUS390XState *env, uint64_t r1) +{ + S390CPU *cpu = env_archcpu(env); + qemu_mutex_lock_iothread(); + ioinst_handle_rchp(cpu, r1, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(rsch)(CPUS390XState *env, uint64_t r1) +{ + S390CPU *cpu = env_archcpu(env); + qemu_mutex_lock_iothread(); + ioinst_handle_rsch(cpu, r1, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(sal)(CPUS390XState *env, uint64_t r1) +{ + S390CPU *cpu = env_archcpu(env); + + qemu_mutex_lock_iothread(); + ioinst_handle_sal(cpu, r1, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(schm)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint64_t inst) +{ + S390CPU *cpu = env_archcpu(env); + + qemu_mutex_lock_iothread(); + ioinst_handle_schm(cpu, r1, r2, inst >> 16, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(ssch)(CPUS390XState *env, uint64_t r1, uint64_t inst) +{ + S390CPU *cpu = env_archcpu(env); + qemu_mutex_lock_iothread(); + ioinst_handle_ssch(cpu, r1, inst >> 16, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(stcrw)(CPUS390XState *env, uint64_t inst) +{ + S390CPU *cpu = env_archcpu(env); + + qemu_mutex_lock_iothread(); + ioinst_handle_stcrw(cpu, inst >> 16, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(stsch)(CPUS390XState *env, uint64_t r1, uint64_t inst) +{ + S390CPU *cpu = env_archcpu(env); + qemu_mutex_lock_iothread(); + ioinst_handle_stsch(cpu, r1, inst >> 16, GETPC()); + qemu_mutex_unlock_iothread(); +} + +uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr) +{ + const uintptr_t ra = GETPC(); + S390CPU *cpu = env_archcpu(env); + QEMUS390FLICState *flic = s390_get_qemu_flic(s390_get_flic()); + QEMUS390FlicIO *io = NULL; + LowCore *lowcore; + + if (addr & 0x3) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + qemu_mutex_lock_iothread(); + io = qemu_s390_flic_dequeue_io(flic, env->cregs[6]); + if (!io) { + qemu_mutex_unlock_iothread(); + return 0; + } + + if (addr) { + struct { + uint16_t id; + uint16_t nr; + uint32_t parm; + } intc = { + .id = cpu_to_be16(io->id), + .nr = cpu_to_be16(io->nr), + .parm = cpu_to_be32(io->parm), + }; + + if (s390_cpu_virt_mem_write(cpu, addr, 0, &intc, sizeof(intc))) { + /* writing failed, reinject and properly clean up */ + s390_io_interrupt(io->id, io->nr, io->parm, io->word); + qemu_mutex_unlock_iothread(); + g_free(io); + s390_cpu_virt_mem_handle_exc(cpu, ra); + return 0; + } + } else { + /* no protection applies */ + lowcore = cpu_map_lowcore(env); + lowcore->subchannel_id = cpu_to_be16(io->id); + lowcore->subchannel_nr = cpu_to_be16(io->nr); + lowcore->io_int_parm = cpu_to_be32(io->parm); + lowcore->io_int_word = cpu_to_be32(io->word); + cpu_unmap_lowcore(lowcore); + } + + g_free(io); + qemu_mutex_unlock_iothread(); + return 1; +} + +void HELPER(tsch)(CPUS390XState *env, uint64_t r1, uint64_t inst) +{ + S390CPU *cpu = env_archcpu(env); + qemu_mutex_lock_iothread(); + ioinst_handle_tsch(cpu, r1, inst >> 16, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(chsc)(CPUS390XState *env, uint64_t inst) +{ + S390CPU *cpu = env_archcpu(env); + qemu_mutex_lock_iothread(); + ioinst_handle_chsc(cpu, inst >> 16, GETPC()); + qemu_mutex_unlock_iothread(); +} +#endif + +#ifndef CONFIG_USER_ONLY +void HELPER(per_check_exception)(CPUS390XState *env) +{ + if (env->per_perc_atmid) { + tcg_s390_program_interrupt(env, PGM_PER, GETPC()); + } +} + +/* Check if an address is within the PER starting address and the PER + ending address. The address range might loop. */ +static inline bool get_per_in_range(CPUS390XState *env, uint64_t addr) +{ + if (env->cregs[10] <= env->cregs[11]) { + return env->cregs[10] <= addr && addr <= env->cregs[11]; + } else { + return env->cregs[10] <= addr || addr <= env->cregs[11]; + } +} + +void HELPER(per_branch)(CPUS390XState *env, uint64_t from, uint64_t to) +{ + if ((env->cregs[9] & PER_CR9_EVENT_BRANCH)) { + if (!(env->cregs[9] & PER_CR9_CONTROL_BRANCH_ADDRESS) + || get_per_in_range(env, to)) { + env->per_address = from; + env->per_perc_atmid = PER_CODE_EVENT_BRANCH | get_per_atmid(env); + } + } +} + +void HELPER(per_ifetch)(CPUS390XState *env, uint64_t addr) +{ + if ((env->cregs[9] & PER_CR9_EVENT_IFETCH) && get_per_in_range(env, addr)) { + env->per_address = addr; + env->per_perc_atmid = PER_CODE_EVENT_IFETCH | get_per_atmid(env); + + /* If the instruction has to be nullified, trigger the + exception immediately. */ + if (env->cregs[9] & PER_CR9_EVENT_NULLIFICATION) { + CPUState *cs = env_cpu(env); + + env->per_perc_atmid |= PER_CODE_EVENT_NULLIFICATION; + env->int_pgm_code = PGM_PER; + env->int_pgm_ilen = get_ilen(cpu_ldub_code(env, addr)); + + cs->exception_index = EXCP_PGM; + cpu_loop_exit(cs); + } + } +} + +void HELPER(per_store_real)(CPUS390XState *env) +{ + if ((env->cregs[9] & PER_CR9_EVENT_STORE) && + (env->cregs[9] & PER_CR9_EVENT_STORE_REAL)) { + /* PSW is saved just before calling the helper. */ + env->per_address = env->psw.addr; + env->per_perc_atmid = PER_CODE_EVENT_STORE_REAL | get_per_atmid(env); + } +} +#endif + +static uint8_t stfl_bytes[2048]; +static unsigned int used_stfl_bytes; + +static void prepare_stfl(void) +{ + static bool initialized; + int i; + + /* racy, but we don't care, the same values are always written */ + if (initialized) { + return; + } + + s390_get_feat_block(S390_FEAT_TYPE_STFL, stfl_bytes); + for (i = 0; i < sizeof(stfl_bytes); i++) { + if (stfl_bytes[i]) { + used_stfl_bytes = i + 1; + } + } + initialized = true; +} + +#ifndef CONFIG_USER_ONLY +void HELPER(stfl)(CPUS390XState *env) +{ + LowCore *lowcore; + + lowcore = cpu_map_lowcore(env); + prepare_stfl(); + memcpy(&lowcore->stfl_fac_list, stfl_bytes, sizeof(lowcore->stfl_fac_list)); + cpu_unmap_lowcore(lowcore); +} +#endif + +uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr) +{ + const uintptr_t ra = GETPC(); + const int count_bytes = ((env->regs[0] & 0xff) + 1) * 8; + int max_bytes; + int i; + + if (addr & 0x7) { + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } + + prepare_stfl(); + max_bytes = ROUND_UP(used_stfl_bytes, 8); + + /* + * The PoP says that doublewords beyond the highest-numbered facility + * bit may or may not be stored. However, existing hardware appears to + * not store the words, and existing software depend on that. + */ + for (i = 0; i < MIN(count_bytes, max_bytes); ++i) { + cpu_stb_data_ra(env, addr + i, stfl_bytes[i], ra); + } + + env->regs[0] = deposit64(env->regs[0], 0, 8, (max_bytes / 8) - 1); + return count_bytes >= max_bytes ? 0 : 3; +} + +#ifndef CONFIG_USER_ONLY +/* + * Note: we ignore any return code of the functions called for the pci + * instructions, as the only time they return !0 is when the stub is + * called, and in that case we didn't even offer the zpci facility. + * The only exception is SIC, where program checks need to be handled + * by the caller. + */ +void HELPER(clp)(CPUS390XState *env, uint32_t r2) +{ + S390CPU *cpu = env_archcpu(env); + + qemu_mutex_lock_iothread(); + clp_service_call(cpu, r2, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(pcilg)(CPUS390XState *env, uint32_t r1, uint32_t r2) +{ + S390CPU *cpu = env_archcpu(env); + + qemu_mutex_lock_iothread(); + pcilg_service_call(cpu, r1, r2, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(pcistg)(CPUS390XState *env, uint32_t r1, uint32_t r2) +{ + S390CPU *cpu = env_archcpu(env); + + qemu_mutex_lock_iothread(); + pcistg_service_call(cpu, r1, r2, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(stpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba, + uint32_t ar) +{ + S390CPU *cpu = env_archcpu(env); + + qemu_mutex_lock_iothread(); + stpcifc_service_call(cpu, r1, fiba, ar, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(sic)(CPUS390XState *env, uint64_t r1, uint64_t r3) +{ + int r; + + qemu_mutex_lock_iothread(); + r = css_do_sic(env, (r3 >> 27) & 0x7, r1 & 0xffff); + qemu_mutex_unlock_iothread(); + /* css_do_sic() may actually return a PGM_xxx value to inject */ + if (r) { + tcg_s390_program_interrupt(env, -r, GETPC()); + } +} + +void HELPER(rpcit)(CPUS390XState *env, uint32_t r1, uint32_t r2) +{ + S390CPU *cpu = env_archcpu(env); + + qemu_mutex_lock_iothread(); + rpcit_service_call(cpu, r1, r2, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(pcistb)(CPUS390XState *env, uint32_t r1, uint32_t r3, + uint64_t gaddr, uint32_t ar) +{ + S390CPU *cpu = env_archcpu(env); + + qemu_mutex_lock_iothread(); + pcistb_service_call(cpu, r1, r3, gaddr, ar, GETPC()); + qemu_mutex_unlock_iothread(); +} + +void HELPER(mpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba, + uint32_t ar) +{ + S390CPU *cpu = env_archcpu(env); + + qemu_mutex_lock_iothread(); + mpcifc_service_call(cpu, r1, fiba, ar, GETPC()); + qemu_mutex_unlock_iothread(); +} +#endif diff --git a/target/s390x/tcg/s390-tod.h b/target/s390x/tcg/s390-tod.h new file mode 100644 index 0000000000..8b74d6a6d8 --- /dev/null +++ b/target/s390x/tcg/s390-tod.h @@ -0,0 +1,29 @@ +/* + * TOD (Time Of Day) clock + * + * Copyright 2018 Red Hat, Inc. + * Author(s): David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef TARGET_S390_TOD_H +#define TARGET_S390_TOD_H + +/* The value of the TOD clock for 1.1.1970. */ +#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL + +/* Converts ns to s390's clock format */ +static inline uint64_t time2tod(uint64_t ns) +{ + return (ns << 9) / 125 + (((ns & 0xff80000000000000ull) / 125) << 9); +} + +/* Converts s390's clock format to ns */ +static inline uint64_t tod2time(uint64_t t) +{ + return ((t >> 9) * 125) + (((t & 0x1ff) * 125) >> 9); +} + +#endif diff --git a/target/s390x/tcg/tcg_s390x.h b/target/s390x/tcg/tcg_s390x.h new file mode 100644 index 0000000000..2f54ccb027 --- /dev/null +++ b/target/s390x/tcg/tcg_s390x.h @@ -0,0 +1,24 @@ +/* + * QEMU TCG support -- s390x specific functions. + * + * Copyright 2018 Red Hat, Inc. + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef TCG_S390X_H +#define TCG_S390X_H + +void tcg_s390_tod_updated(CPUState *cs, run_on_cpu_data opaque); +void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, + uint32_t code, uintptr_t ra); +void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc, + uintptr_t ra); +void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc, + uintptr_t ra); + +#endif /* TCG_S390X_H */ diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c new file mode 100644 index 0000000000..92fa7656c2 --- /dev/null +++ b/target/s390x/tcg/translate.c @@ -0,0 +1,6672 @@ +/* + * S/390 translation + * + * Copyright (c) 2009 Ulrich Hecht + * Copyright (c) 2010 Alexander Graf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* #define DEBUG_INLINE_BRANCHES */ +#define S390X_DEBUG_DISAS +/* #define S390X_DEBUG_DISAS_VERBOSE */ + +#ifdef S390X_DEBUG_DISAS_VERBOSE +# define LOG_DISAS(...) qemu_log(__VA_ARGS__) +#else +# define LOG_DISAS(...) do { } while (0) +#endif + +#include "qemu/osdep.h" +#include "cpu.h" +#include "s390x-internal.h" +#include "disas/disas.h" +#include "exec/exec-all.h" +#include "tcg/tcg-op.h" +#include "tcg/tcg-op-gvec.h" +#include "qemu/log.h" +#include "qemu/host-utils.h" +#include "exec/cpu_ldst.h" +#include "exec/gen-icount.h" +#include "exec/helper-proto.h" +#include "exec/helper-gen.h" + +#include "exec/translator.h" +#include "exec/log.h" +#include "qemu/atomic128.h" + + +/* Information that (most) every instruction needs to manipulate. */ +typedef struct DisasContext DisasContext; +typedef struct DisasInsn DisasInsn; +typedef struct DisasFields DisasFields; + +/* + * Define a structure to hold the decoded fields. We'll store each inside + * an array indexed by an enum. In order to conserve memory, we'll arrange + * for fields that do not exist at the same time to overlap, thus the "C" + * for compact. For checking purposes there is an "O" for original index + * as well that will be applied to availability bitmaps. + */ + +enum DisasFieldIndexO { + FLD_O_r1, + FLD_O_r2, + FLD_O_r3, + FLD_O_m1, + FLD_O_m3, + FLD_O_m4, + FLD_O_m5, + FLD_O_m6, + FLD_O_b1, + FLD_O_b2, + FLD_O_b4, + FLD_O_d1, + FLD_O_d2, + FLD_O_d4, + FLD_O_x2, + FLD_O_l1, + FLD_O_l2, + FLD_O_i1, + FLD_O_i2, + FLD_O_i3, + FLD_O_i4, + FLD_O_i5, + FLD_O_v1, + FLD_O_v2, + FLD_O_v3, + FLD_O_v4, +}; + +enum DisasFieldIndexC { + FLD_C_r1 = 0, + FLD_C_m1 = 0, + FLD_C_b1 = 0, + FLD_C_i1 = 0, + FLD_C_v1 = 0, + + FLD_C_r2 = 1, + FLD_C_b2 = 1, + FLD_C_i2 = 1, + + FLD_C_r3 = 2, + FLD_C_m3 = 2, + FLD_C_i3 = 2, + FLD_C_v3 = 2, + + FLD_C_m4 = 3, + FLD_C_b4 = 3, + FLD_C_i4 = 3, + FLD_C_l1 = 3, + FLD_C_v4 = 3, + + FLD_C_i5 = 4, + FLD_C_d1 = 4, + FLD_C_m5 = 4, + + FLD_C_d2 = 5, + FLD_C_m6 = 5, + + FLD_C_d4 = 6, + FLD_C_x2 = 6, + FLD_C_l2 = 6, + FLD_C_v2 = 6, + + NUM_C_FIELD = 7 +}; + +struct DisasFields { + uint64_t raw_insn; + unsigned op:8; + unsigned op2:8; + unsigned presentC:16; + unsigned int presentO; + int c[NUM_C_FIELD]; +}; + +struct DisasContext { + DisasContextBase base; + const DisasInsn *insn; + DisasFields fields; + uint64_t ex_value; + /* + * During translate_one(), pc_tmp is used to determine the instruction + * to be executed after base.pc_next - e.g. next sequential instruction + * or a branch target. + */ + uint64_t pc_tmp; + uint32_t ilen; + enum cc_op cc_op; + bool do_debug; +}; + +/* Information carried about a condition to be evaluated. */ +typedef struct { + TCGCond cond:8; + bool is_64; + bool g1; + bool g2; + union { + struct { TCGv_i64 a, b; } s64; + struct { TCGv_i32 a, b; } s32; + } u; +} DisasCompare; + +#ifdef DEBUG_INLINE_BRANCHES +static uint64_t inline_branch_hit[CC_OP_MAX]; +static uint64_t inline_branch_miss[CC_OP_MAX]; +#endif + +static void pc_to_link_info(TCGv_i64 out, DisasContext *s, uint64_t pc) +{ + TCGv_i64 tmp; + + if (s->base.tb->flags & FLAG_MASK_32) { + if (s->base.tb->flags & FLAG_MASK_64) { + tcg_gen_movi_i64(out, pc); + return; + } + pc |= 0x80000000; + } + assert(!(s->base.tb->flags & FLAG_MASK_64)); + tmp = tcg_const_i64(pc); + tcg_gen_deposit_i64(out, out, tmp, 0, 32); + tcg_temp_free_i64(tmp); +} + +static TCGv_i64 psw_addr; +static TCGv_i64 psw_mask; +static TCGv_i64 gbea; + +static TCGv_i32 cc_op; +static TCGv_i64 cc_src; +static TCGv_i64 cc_dst; +static TCGv_i64 cc_vr; + +static char cpu_reg_names[16][4]; +static TCGv_i64 regs[16]; + +void s390x_translate_init(void) +{ + int i; + + psw_addr = tcg_global_mem_new_i64(cpu_env, + offsetof(CPUS390XState, psw.addr), + "psw_addr"); + psw_mask = tcg_global_mem_new_i64(cpu_env, + offsetof(CPUS390XState, psw.mask), + "psw_mask"); + gbea = tcg_global_mem_new_i64(cpu_env, + offsetof(CPUS390XState, gbea), + "gbea"); + + cc_op = tcg_global_mem_new_i32(cpu_env, offsetof(CPUS390XState, cc_op), + "cc_op"); + cc_src = tcg_global_mem_new_i64(cpu_env, offsetof(CPUS390XState, cc_src), + "cc_src"); + cc_dst = tcg_global_mem_new_i64(cpu_env, offsetof(CPUS390XState, cc_dst), + "cc_dst"); + cc_vr = tcg_global_mem_new_i64(cpu_env, offsetof(CPUS390XState, cc_vr), + "cc_vr"); + + for (i = 0; i < 16; i++) { + snprintf(cpu_reg_names[i], sizeof(cpu_reg_names[0]), "r%d", i); + regs[i] = tcg_global_mem_new(cpu_env, + offsetof(CPUS390XState, regs[i]), + cpu_reg_names[i]); + } +} + +static inline int vec_full_reg_offset(uint8_t reg) +{ + g_assert(reg < 32); + return offsetof(CPUS390XState, vregs[reg][0]); +} + +static inline int vec_reg_offset(uint8_t reg, uint8_t enr, MemOp es) +{ + /* Convert element size (es) - e.g. MO_8 - to bytes */ + const uint8_t bytes = 1 << es; + int offs = enr * bytes; + + /* + * vregs[n][0] is the lowest 8 byte and vregs[n][1] the highest 8 byte + * of the 16 byte vector, on both, little and big endian systems. + * + * Big Endian (target/possible host) + * B: [ 0][ 1][ 2][ 3][ 4][ 5][ 6][ 7] - [ 8][ 9][10][11][12][13][14][15] + * HW: [ 0][ 1][ 2][ 3] - [ 4][ 5][ 6][ 7] + * W: [ 0][ 1] - [ 2][ 3] + * DW: [ 0] - [ 1] + * + * Little Endian (possible host) + * B: [ 7][ 6][ 5][ 4][ 3][ 2][ 1][ 0] - [15][14][13][12][11][10][ 9][ 8] + * HW: [ 3][ 2][ 1][ 0] - [ 7][ 6][ 5][ 4] + * W: [ 1][ 0] - [ 3][ 2] + * DW: [ 0] - [ 1] + * + * For 16 byte elements, the two 8 byte halves will not form a host + * int128 if the host is little endian, since they're in the wrong order. + * Some operations (e.g. xor) do not care. For operations like addition, + * the two 8 byte elements have to be loaded separately. Let's force all + * 16 byte operations to handle it in a special way. + */ + g_assert(es <= MO_64); +#ifndef HOST_WORDS_BIGENDIAN + offs ^= (8 - bytes); +#endif + return offs + vec_full_reg_offset(reg); +} + +static inline int freg64_offset(uint8_t reg) +{ + g_assert(reg < 16); + return vec_reg_offset(reg, 0, MO_64); +} + +static inline int freg32_offset(uint8_t reg) +{ + g_assert(reg < 16); + return vec_reg_offset(reg, 0, MO_32); +} + +static TCGv_i64 load_reg(int reg) +{ + TCGv_i64 r = tcg_temp_new_i64(); + tcg_gen_mov_i64(r, regs[reg]); + return r; +} + +static TCGv_i64 load_freg(int reg) +{ + TCGv_i64 r = tcg_temp_new_i64(); + + tcg_gen_ld_i64(r, cpu_env, freg64_offset(reg)); + return r; +} + +static TCGv_i64 load_freg32_i64(int reg) +{ + TCGv_i64 r = tcg_temp_new_i64(); + + tcg_gen_ld32u_i64(r, cpu_env, freg32_offset(reg)); + return r; +} + +static void store_reg(int reg, TCGv_i64 v) +{ + tcg_gen_mov_i64(regs[reg], v); +} + +static void store_freg(int reg, TCGv_i64 v) +{ + tcg_gen_st_i64(v, cpu_env, freg64_offset(reg)); +} + +static void store_reg32_i64(int reg, TCGv_i64 v) +{ + /* 32 bit register writes keep the upper half */ + tcg_gen_deposit_i64(regs[reg], regs[reg], v, 0, 32); +} + +static void store_reg32h_i64(int reg, TCGv_i64 v) +{ + tcg_gen_deposit_i64(regs[reg], regs[reg], v, 32, 32); +} + +static void store_freg32_i64(int reg, TCGv_i64 v) +{ + tcg_gen_st32_i64(v, cpu_env, freg32_offset(reg)); +} + +static void return_low128(TCGv_i64 dest) +{ + tcg_gen_ld_i64(dest, cpu_env, offsetof(CPUS390XState, retxl)); +} + +static void update_psw_addr(DisasContext *s) +{ + /* psw.addr */ + tcg_gen_movi_i64(psw_addr, s->base.pc_next); +} + +static void per_branch(DisasContext *s, bool to_next) +{ +#ifndef CONFIG_USER_ONLY + tcg_gen_movi_i64(gbea, s->base.pc_next); + + if (s->base.tb->flags & FLAG_MASK_PER) { + TCGv_i64 next_pc = to_next ? tcg_const_i64(s->pc_tmp) : psw_addr; + gen_helper_per_branch(cpu_env, gbea, next_pc); + if (to_next) { + tcg_temp_free_i64(next_pc); + } + } +#endif +} + +static void per_branch_cond(DisasContext *s, TCGCond cond, + TCGv_i64 arg1, TCGv_i64 arg2) +{ +#ifndef CONFIG_USER_ONLY + if (s->base.tb->flags & FLAG_MASK_PER) { + TCGLabel *lab = gen_new_label(); + tcg_gen_brcond_i64(tcg_invert_cond(cond), arg1, arg2, lab); + + tcg_gen_movi_i64(gbea, s->base.pc_next); + gen_helper_per_branch(cpu_env, gbea, psw_addr); + + gen_set_label(lab); + } else { + TCGv_i64 pc = tcg_const_i64(s->base.pc_next); + tcg_gen_movcond_i64(cond, gbea, arg1, arg2, gbea, pc); + tcg_temp_free_i64(pc); + } +#endif +} + +static void per_breaking_event(DisasContext *s) +{ + tcg_gen_movi_i64(gbea, s->base.pc_next); +} + +static void update_cc_op(DisasContext *s) +{ + if (s->cc_op != CC_OP_DYNAMIC && s->cc_op != CC_OP_STATIC) { + tcg_gen_movi_i32(cc_op, s->cc_op); + } +} + +static inline uint64_t ld_code2(CPUS390XState *env, uint64_t pc) +{ + return (uint64_t)cpu_lduw_code(env, pc); +} + +static inline uint64_t ld_code4(CPUS390XState *env, uint64_t pc) +{ + return (uint64_t)(uint32_t)cpu_ldl_code(env, pc); +} + +static int get_mem_index(DisasContext *s) +{ +#ifdef CONFIG_USER_ONLY + return MMU_USER_IDX; +#else + if (!(s->base.tb->flags & FLAG_MASK_DAT)) { + return MMU_REAL_IDX; + } + + switch (s->base.tb->flags & FLAG_MASK_ASC) { + case PSW_ASC_PRIMARY >> FLAG_MASK_PSW_SHIFT: + return MMU_PRIMARY_IDX; + case PSW_ASC_SECONDARY >> FLAG_MASK_PSW_SHIFT: + return MMU_SECONDARY_IDX; + case PSW_ASC_HOME >> FLAG_MASK_PSW_SHIFT: + return MMU_HOME_IDX; + default: + tcg_abort(); + break; + } +#endif +} + +static void gen_exception(int excp) +{ + TCGv_i32 tmp = tcg_const_i32(excp); + gen_helper_exception(cpu_env, tmp); + tcg_temp_free_i32(tmp); +} + +static void gen_program_exception(DisasContext *s, int code) +{ + TCGv_i32 tmp; + + /* Remember what pgm exeption this was. */ + tmp = tcg_const_i32(code); + tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_code)); + tcg_temp_free_i32(tmp); + + tmp = tcg_const_i32(s->ilen); + tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_ilen)); + tcg_temp_free_i32(tmp); + + /* update the psw */ + update_psw_addr(s); + + /* Save off cc. */ + update_cc_op(s); + + /* Trigger exception. */ + gen_exception(EXCP_PGM); +} + +static inline void gen_illegal_opcode(DisasContext *s) +{ + gen_program_exception(s, PGM_OPERATION); +} + +static inline void gen_data_exception(uint8_t dxc) +{ + TCGv_i32 tmp = tcg_const_i32(dxc); + gen_helper_data_exception(cpu_env, tmp); + tcg_temp_free_i32(tmp); +} + +static inline void gen_trap(DisasContext *s) +{ + /* Set DXC to 0xff */ + gen_data_exception(0xff); +} + +static void gen_addi_and_wrap_i64(DisasContext *s, TCGv_i64 dst, TCGv_i64 src, + int64_t imm) +{ + tcg_gen_addi_i64(dst, src, imm); + if (!(s->base.tb->flags & FLAG_MASK_64)) { + if (s->base.tb->flags & FLAG_MASK_32) { + tcg_gen_andi_i64(dst, dst, 0x7fffffff); + } else { + tcg_gen_andi_i64(dst, dst, 0x00ffffff); + } + } +} + +static TCGv_i64 get_address(DisasContext *s, int x2, int b2, int d2) +{ + TCGv_i64 tmp = tcg_temp_new_i64(); + + /* + * Note that d2 is limited to 20 bits, signed. If we crop negative + * displacements early we create larger immedate addends. + */ + if (b2 && x2) { + tcg_gen_add_i64(tmp, regs[b2], regs[x2]); + gen_addi_and_wrap_i64(s, tmp, tmp, d2); + } else if (b2) { + gen_addi_and_wrap_i64(s, tmp, regs[b2], d2); + } else if (x2) { + gen_addi_and_wrap_i64(s, tmp, regs[x2], d2); + } else if (!(s->base.tb->flags & FLAG_MASK_64)) { + if (s->base.tb->flags & FLAG_MASK_32) { + tcg_gen_movi_i64(tmp, d2 & 0x7fffffff); + } else { + tcg_gen_movi_i64(tmp, d2 & 0x00ffffff); + } + } else { + tcg_gen_movi_i64(tmp, d2); + } + + return tmp; +} + +static inline bool live_cc_data(DisasContext *s) +{ + return (s->cc_op != CC_OP_DYNAMIC + && s->cc_op != CC_OP_STATIC + && s->cc_op > 3); +} + +static inline void gen_op_movi_cc(DisasContext *s, uint32_t val) +{ + if (live_cc_data(s)) { + tcg_gen_discard_i64(cc_src); + tcg_gen_discard_i64(cc_dst); + tcg_gen_discard_i64(cc_vr); + } + s->cc_op = CC_OP_CONST0 + val; +} + +static void gen_op_update1_cc_i64(DisasContext *s, enum cc_op op, TCGv_i64 dst) +{ + if (live_cc_data(s)) { + tcg_gen_discard_i64(cc_src); + tcg_gen_discard_i64(cc_vr); + } + tcg_gen_mov_i64(cc_dst, dst); + s->cc_op = op; +} + +static void gen_op_update2_cc_i64(DisasContext *s, enum cc_op op, TCGv_i64 src, + TCGv_i64 dst) +{ + if (live_cc_data(s)) { + tcg_gen_discard_i64(cc_vr); + } + tcg_gen_mov_i64(cc_src, src); + tcg_gen_mov_i64(cc_dst, dst); + s->cc_op = op; +} + +static void gen_op_update3_cc_i64(DisasContext *s, enum cc_op op, TCGv_i64 src, + TCGv_i64 dst, TCGv_i64 vr) +{ + tcg_gen_mov_i64(cc_src, src); + tcg_gen_mov_i64(cc_dst, dst); + tcg_gen_mov_i64(cc_vr, vr); + s->cc_op = op; +} + +static void set_cc_nz_u64(DisasContext *s, TCGv_i64 val) +{ + gen_op_update1_cc_i64(s, CC_OP_NZ, val); +} + +/* CC value is in env->cc_op */ +static void set_cc_static(DisasContext *s) +{ + if (live_cc_data(s)) { + tcg_gen_discard_i64(cc_src); + tcg_gen_discard_i64(cc_dst); + tcg_gen_discard_i64(cc_vr); + } + s->cc_op = CC_OP_STATIC; +} + +/* calculates cc into cc_op */ +static void gen_op_calc_cc(DisasContext *s) +{ + TCGv_i32 local_cc_op = NULL; + TCGv_i64 dummy = NULL; + + switch (s->cc_op) { + default: + dummy = tcg_const_i64(0); + /* FALLTHRU */ + case CC_OP_ADD_64: + case CC_OP_SUB_64: + case CC_OP_ADD_32: + case CC_OP_SUB_32: + local_cc_op = tcg_const_i32(s->cc_op); + break; + case CC_OP_CONST0: + case CC_OP_CONST1: + case CC_OP_CONST2: + case CC_OP_CONST3: + case CC_OP_STATIC: + case CC_OP_DYNAMIC: + break; + } + + switch (s->cc_op) { + case CC_OP_CONST0: + case CC_OP_CONST1: + case CC_OP_CONST2: + case CC_OP_CONST3: + /* s->cc_op is the cc value */ + tcg_gen_movi_i32(cc_op, s->cc_op - CC_OP_CONST0); + break; + case CC_OP_STATIC: + /* env->cc_op already is the cc value */ + break; + case CC_OP_NZ: + case CC_OP_ABS_64: + case CC_OP_NABS_64: + case CC_OP_ABS_32: + case CC_OP_NABS_32: + case CC_OP_LTGT0_32: + case CC_OP_LTGT0_64: + case CC_OP_COMP_32: + case CC_OP_COMP_64: + case CC_OP_NZ_F32: + case CC_OP_NZ_F64: + case CC_OP_FLOGR: + case CC_OP_LCBB: + case CC_OP_MULS_32: + /* 1 argument */ + gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, dummy, cc_dst, dummy); + break; + case CC_OP_ADDU: + case CC_OP_ICM: + case CC_OP_LTGT_32: + case CC_OP_LTGT_64: + case CC_OP_LTUGTU_32: + case CC_OP_LTUGTU_64: + case CC_OP_TM_32: + case CC_OP_TM_64: + case CC_OP_SLA_32: + case CC_OP_SLA_64: + case CC_OP_SUBU: + case CC_OP_NZ_F128: + case CC_OP_VC: + case CC_OP_MULS_64: + /* 2 arguments */ + gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, dummy); + break; + case CC_OP_ADD_64: + case CC_OP_SUB_64: + case CC_OP_ADD_32: + case CC_OP_SUB_32: + /* 3 arguments */ + gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, cc_vr); + break; + case CC_OP_DYNAMIC: + /* unknown operation - assume 3 arguments and cc_op in env */ + gen_helper_calc_cc(cc_op, cpu_env, cc_op, cc_src, cc_dst, cc_vr); + break; + default: + tcg_abort(); + } + + if (local_cc_op) { + tcg_temp_free_i32(local_cc_op); + } + if (dummy) { + tcg_temp_free_i64(dummy); + } + + /* We now have cc in cc_op as constant */ + set_cc_static(s); +} + +static bool use_goto_tb(DisasContext *s, uint64_t dest) +{ + if (unlikely(s->base.tb->flags & FLAG_MASK_PER)) { + return false; + } + return translator_use_goto_tb(&s->base, dest); +} + +static void account_noninline_branch(DisasContext *s, int cc_op) +{ +#ifdef DEBUG_INLINE_BRANCHES + inline_branch_miss[cc_op]++; +#endif +} + +static void account_inline_branch(DisasContext *s, int cc_op) +{ +#ifdef DEBUG_INLINE_BRANCHES + inline_branch_hit[cc_op]++; +#endif +} + +/* Table of mask values to comparison codes, given a comparison as input. + For such, CC=3 should not be possible. */ +static const TCGCond ltgt_cond[16] = { + TCG_COND_NEVER, TCG_COND_NEVER, /* | | | x */ + TCG_COND_GT, TCG_COND_GT, /* | | GT | x */ + TCG_COND_LT, TCG_COND_LT, /* | LT | | x */ + TCG_COND_NE, TCG_COND_NE, /* | LT | GT | x */ + TCG_COND_EQ, TCG_COND_EQ, /* EQ | | | x */ + TCG_COND_GE, TCG_COND_GE, /* EQ | | GT | x */ + TCG_COND_LE, TCG_COND_LE, /* EQ | LT | | x */ + TCG_COND_ALWAYS, TCG_COND_ALWAYS, /* EQ | LT | GT | x */ +}; + +/* Table of mask values to comparison codes, given a logic op as input. + For such, only CC=0 and CC=1 should be possible. */ +static const TCGCond nz_cond[16] = { + TCG_COND_NEVER, TCG_COND_NEVER, /* | | x | x */ + TCG_COND_NEVER, TCG_COND_NEVER, + TCG_COND_NE, TCG_COND_NE, /* | NE | x | x */ + TCG_COND_NE, TCG_COND_NE, + TCG_COND_EQ, TCG_COND_EQ, /* EQ | | x | x */ + TCG_COND_EQ, TCG_COND_EQ, + TCG_COND_ALWAYS, TCG_COND_ALWAYS, /* EQ | NE | x | x */ + TCG_COND_ALWAYS, TCG_COND_ALWAYS, +}; + +/* Interpret MASK in terms of S->CC_OP, and fill in C with all the + details required to generate a TCG comparison. */ +static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask) +{ + TCGCond cond; + enum cc_op old_cc_op = s->cc_op; + + if (mask == 15 || mask == 0) { + c->cond = (mask ? TCG_COND_ALWAYS : TCG_COND_NEVER); + c->u.s32.a = cc_op; + c->u.s32.b = cc_op; + c->g1 = c->g2 = true; + c->is_64 = false; + return; + } + + /* Find the TCG condition for the mask + cc op. */ + switch (old_cc_op) { + case CC_OP_LTGT0_32: + case CC_OP_LTGT0_64: + case CC_OP_LTGT_32: + case CC_OP_LTGT_64: + cond = ltgt_cond[mask]; + if (cond == TCG_COND_NEVER) { + goto do_dynamic; + } + account_inline_branch(s, old_cc_op); + break; + + case CC_OP_LTUGTU_32: + case CC_OP_LTUGTU_64: + cond = tcg_unsigned_cond(ltgt_cond[mask]); + if (cond == TCG_COND_NEVER) { + goto do_dynamic; + } + account_inline_branch(s, old_cc_op); + break; + + case CC_OP_NZ: + cond = nz_cond[mask]; + if (cond == TCG_COND_NEVER) { + goto do_dynamic; + } + account_inline_branch(s, old_cc_op); + break; + + case CC_OP_TM_32: + case CC_OP_TM_64: + switch (mask) { + case 8: + cond = TCG_COND_EQ; + break; + case 4 | 2 | 1: + cond = TCG_COND_NE; + break; + default: + goto do_dynamic; + } + account_inline_branch(s, old_cc_op); + break; + + case CC_OP_ICM: + switch (mask) { + case 8: + cond = TCG_COND_EQ; + break; + case 4 | 2 | 1: + case 4 | 2: + cond = TCG_COND_NE; + break; + default: + goto do_dynamic; + } + account_inline_branch(s, old_cc_op); + break; + + case CC_OP_FLOGR: + switch (mask & 0xa) { + case 8: /* src == 0 -> no one bit found */ + cond = TCG_COND_EQ; + break; + case 2: /* src != 0 -> one bit found */ + cond = TCG_COND_NE; + break; + default: + goto do_dynamic; + } + account_inline_branch(s, old_cc_op); + break; + + case CC_OP_ADDU: + case CC_OP_SUBU: + switch (mask) { + case 8 | 2: /* result == 0 */ + cond = TCG_COND_EQ; + break; + case 4 | 1: /* result != 0 */ + cond = TCG_COND_NE; + break; + case 8 | 4: /* !carry (borrow) */ + cond = old_cc_op == CC_OP_ADDU ? TCG_COND_EQ : TCG_COND_NE; + break; + case 2 | 1: /* carry (!borrow) */ + cond = old_cc_op == CC_OP_ADDU ? TCG_COND_NE : TCG_COND_EQ; + break; + default: + goto do_dynamic; + } + account_inline_branch(s, old_cc_op); + break; + + default: + do_dynamic: + /* Calculate cc value. */ + gen_op_calc_cc(s); + /* FALLTHRU */ + + case CC_OP_STATIC: + /* Jump based on CC. We'll load up the real cond below; + the assignment here merely avoids a compiler warning. */ + account_noninline_branch(s, old_cc_op); + old_cc_op = CC_OP_STATIC; + cond = TCG_COND_NEVER; + break; + } + + /* Load up the arguments of the comparison. */ + c->is_64 = true; + c->g1 = c->g2 = false; + switch (old_cc_op) { + case CC_OP_LTGT0_32: + c->is_64 = false; + c->u.s32.a = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(c->u.s32.a, cc_dst); + c->u.s32.b = tcg_const_i32(0); + break; + case CC_OP_LTGT_32: + case CC_OP_LTUGTU_32: + c->is_64 = false; + c->u.s32.a = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(c->u.s32.a, cc_src); + c->u.s32.b = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(c->u.s32.b, cc_dst); + break; + + case CC_OP_LTGT0_64: + case CC_OP_NZ: + case CC_OP_FLOGR: + c->u.s64.a = cc_dst; + c->u.s64.b = tcg_const_i64(0); + c->g1 = true; + break; + case CC_OP_LTGT_64: + case CC_OP_LTUGTU_64: + c->u.s64.a = cc_src; + c->u.s64.b = cc_dst; + c->g1 = c->g2 = true; + break; + + case CC_OP_TM_32: + case CC_OP_TM_64: + case CC_OP_ICM: + c->u.s64.a = tcg_temp_new_i64(); + c->u.s64.b = tcg_const_i64(0); + tcg_gen_and_i64(c->u.s64.a, cc_src, cc_dst); + break; + + case CC_OP_ADDU: + case CC_OP_SUBU: + c->is_64 = true; + c->u.s64.b = tcg_const_i64(0); + c->g1 = true; + switch (mask) { + case 8 | 2: + case 4 | 1: /* result */ + c->u.s64.a = cc_dst; + break; + case 8 | 4: + case 2 | 1: /* carry */ + c->u.s64.a = cc_src; + break; + default: + g_assert_not_reached(); + } + break; + + case CC_OP_STATIC: + c->is_64 = false; + c->u.s32.a = cc_op; + c->g1 = true; + switch (mask) { + case 0x8 | 0x4 | 0x2: /* cc != 3 */ + cond = TCG_COND_NE; + c->u.s32.b = tcg_const_i32(3); + break; + case 0x8 | 0x4 | 0x1: /* cc != 2 */ + cond = TCG_COND_NE; + c->u.s32.b = tcg_const_i32(2); + break; + case 0x8 | 0x2 | 0x1: /* cc != 1 */ + cond = TCG_COND_NE; + c->u.s32.b = tcg_const_i32(1); + break; + case 0x8 | 0x2: /* cc == 0 || cc == 2 => (cc & 1) == 0 */ + cond = TCG_COND_EQ; + c->g1 = false; + c->u.s32.a = tcg_temp_new_i32(); + c->u.s32.b = tcg_const_i32(0); + tcg_gen_andi_i32(c->u.s32.a, cc_op, 1); + break; + case 0x8 | 0x4: /* cc < 2 */ + cond = TCG_COND_LTU; + c->u.s32.b = tcg_const_i32(2); + break; + case 0x8: /* cc == 0 */ + cond = TCG_COND_EQ; + c->u.s32.b = tcg_const_i32(0); + break; + case 0x4 | 0x2 | 0x1: /* cc != 0 */ + cond = TCG_COND_NE; + c->u.s32.b = tcg_const_i32(0); + break; + case 0x4 | 0x1: /* cc == 1 || cc == 3 => (cc & 1) != 0 */ + cond = TCG_COND_NE; + c->g1 = false; + c->u.s32.a = tcg_temp_new_i32(); + c->u.s32.b = tcg_const_i32(0); + tcg_gen_andi_i32(c->u.s32.a, cc_op, 1); + break; + case 0x4: /* cc == 1 */ + cond = TCG_COND_EQ; + c->u.s32.b = tcg_const_i32(1); + break; + case 0x2 | 0x1: /* cc > 1 */ + cond = TCG_COND_GTU; + c->u.s32.b = tcg_const_i32(1); + break; + case 0x2: /* cc == 2 */ + cond = TCG_COND_EQ; + c->u.s32.b = tcg_const_i32(2); + break; + case 0x1: /* cc == 3 */ + cond = TCG_COND_EQ; + c->u.s32.b = tcg_const_i32(3); + break; + default: + /* CC is masked by something else: (8 >> cc) & mask. */ + cond = TCG_COND_NE; + c->g1 = false; + c->u.s32.a = tcg_const_i32(8); + c->u.s32.b = tcg_const_i32(0); + tcg_gen_shr_i32(c->u.s32.a, c->u.s32.a, cc_op); + tcg_gen_andi_i32(c->u.s32.a, c->u.s32.a, mask); + break; + } + break; + + default: + abort(); + } + c->cond = cond; +} + +static void free_compare(DisasCompare *c) +{ + if (!c->g1) { + if (c->is_64) { + tcg_temp_free_i64(c->u.s64.a); + } else { + tcg_temp_free_i32(c->u.s32.a); + } + } + if (!c->g2) { + if (c->is_64) { + tcg_temp_free_i64(c->u.s64.b); + } else { + tcg_temp_free_i32(c->u.s32.b); + } + } +} + +/* ====================================================================== */ +/* Define the insn format enumeration. */ +#define F0(N) FMT_##N, +#define F1(N, X1) F0(N) +#define F2(N, X1, X2) F0(N) +#define F3(N, X1, X2, X3) F0(N) +#define F4(N, X1, X2, X3, X4) F0(N) +#define F5(N, X1, X2, X3, X4, X5) F0(N) +#define F6(N, X1, X2, X3, X4, X5, X6) F0(N) + +typedef enum { +#include "insn-format.def" +} DisasFormat; + +#undef F0 +#undef F1 +#undef F2 +#undef F3 +#undef F4 +#undef F5 +#undef F6 + +/* This is the way fields are to be accessed out of DisasFields. */ +#define have_field(S, F) have_field1((S), FLD_O_##F) +#define get_field(S, F) get_field1((S), FLD_O_##F, FLD_C_##F) + +static bool have_field1(const DisasContext *s, enum DisasFieldIndexO c) +{ + return (s->fields.presentO >> c) & 1; +} + +static int get_field1(const DisasContext *s, enum DisasFieldIndexO o, + enum DisasFieldIndexC c) +{ + assert(have_field1(s, o)); + return s->fields.c[c]; +} + +/* Describe the layout of each field in each format. */ +typedef struct DisasField { + unsigned int beg:8; + unsigned int size:8; + unsigned int type:2; + unsigned int indexC:6; + enum DisasFieldIndexO indexO:8; +} DisasField; + +typedef struct DisasFormatInfo { + DisasField op[NUM_C_FIELD]; +} DisasFormatInfo; + +#define R(N, B) { B, 4, 0, FLD_C_r##N, FLD_O_r##N } +#define M(N, B) { B, 4, 0, FLD_C_m##N, FLD_O_m##N } +#define V(N, B) { B, 4, 3, FLD_C_v##N, FLD_O_v##N } +#define BD(N, BB, BD) { BB, 4, 0, FLD_C_b##N, FLD_O_b##N }, \ + { BD, 12, 0, FLD_C_d##N, FLD_O_d##N } +#define BXD(N) { 16, 4, 0, FLD_C_b##N, FLD_O_b##N }, \ + { 12, 4, 0, FLD_C_x##N, FLD_O_x##N }, \ + { 20, 12, 0, FLD_C_d##N, FLD_O_d##N } +#define BDL(N) { 16, 4, 0, FLD_C_b##N, FLD_O_b##N }, \ + { 20, 20, 2, FLD_C_d##N, FLD_O_d##N } +#define BXDL(N) { 16, 4, 0, FLD_C_b##N, FLD_O_b##N }, \ + { 12, 4, 0, FLD_C_x##N, FLD_O_x##N }, \ + { 20, 20, 2, FLD_C_d##N, FLD_O_d##N } +#define I(N, B, S) { B, S, 1, FLD_C_i##N, FLD_O_i##N } +#define L(N, B, S) { B, S, 0, FLD_C_l##N, FLD_O_l##N } + +#define F0(N) { { } }, +#define F1(N, X1) { { X1 } }, +#define F2(N, X1, X2) { { X1, X2 } }, +#define F3(N, X1, X2, X3) { { X1, X2, X3 } }, +#define F4(N, X1, X2, X3, X4) { { X1, X2, X3, X4 } }, +#define F5(N, X1, X2, X3, X4, X5) { { X1, X2, X3, X4, X5 } }, +#define F6(N, X1, X2, X3, X4, X5, X6) { { X1, X2, X3, X4, X5, X6 } }, + +static const DisasFormatInfo format_info[] = { +#include "insn-format.def" +}; + +#undef F0 +#undef F1 +#undef F2 +#undef F3 +#undef F4 +#undef F5 +#undef F6 +#undef R +#undef M +#undef V +#undef BD +#undef BXD +#undef BDL +#undef BXDL +#undef I +#undef L + +/* Generally, we'll extract operands into this structures, operate upon + them, and store them back. See the "in1", "in2", "prep", "wout" sets + of routines below for more details. */ +typedef struct { + bool g_out, g_out2, g_in1, g_in2; + TCGv_i64 out, out2, in1, in2; + TCGv_i64 addr1; +} DisasOps; + +/* Instructions can place constraints on their operands, raising specification + exceptions if they are violated. To make this easy to automate, each "in1", + "in2", "prep", "wout" helper will have a SPEC_<name> define that equals one + of the following, or 0. To make this easy to document, we'll put the + SPEC_<name> defines next to <name>. */ + +#define SPEC_r1_even 1 +#define SPEC_r2_even 2 +#define SPEC_r3_even 4 +#define SPEC_r1_f128 8 +#define SPEC_r2_f128 16 + +/* Return values from translate_one, indicating the state of the TB. */ + +/* We are not using a goto_tb (for whatever reason), but have updated + the PC (for whatever reason), so there's no need to do it again on + exiting the TB. */ +#define DISAS_PC_UPDATED DISAS_TARGET_0 + +/* We have emitted one or more goto_tb. No fixup required. */ +#define DISAS_GOTO_TB DISAS_TARGET_1 + +/* We have updated the PC and CC values. */ +#define DISAS_PC_CC_UPDATED DISAS_TARGET_2 + +/* We are exiting the TB, but have neither emitted a goto_tb, nor + updated the PC for the next instruction to be executed. */ +#define DISAS_PC_STALE DISAS_TARGET_3 + +/* We are exiting the TB to the main loop. */ +#define DISAS_PC_STALE_NOCHAIN DISAS_TARGET_4 + + +/* Instruction flags */ +#define IF_AFP1 0x0001 /* r1 is a fp reg for HFP/FPS instructions */ +#define IF_AFP2 0x0002 /* r2 is a fp reg for HFP/FPS instructions */ +#define IF_AFP3 0x0004 /* r3 is a fp reg for HFP/FPS instructions */ +#define IF_BFP 0x0008 /* binary floating point instruction */ +#define IF_DFP 0x0010 /* decimal floating point instruction */ +#define IF_PRIV 0x0020 /* privileged instruction */ +#define IF_VEC 0x0040 /* vector instruction */ +#define IF_IO 0x0080 /* input/output instruction */ + +struct DisasInsn { + unsigned opc:16; + unsigned flags:16; + DisasFormat fmt:8; + unsigned fac:8; + unsigned spec:8; + + const char *name; + + /* Pre-process arguments before HELP_OP. */ + void (*help_in1)(DisasContext *, DisasOps *); + void (*help_in2)(DisasContext *, DisasOps *); + void (*help_prep)(DisasContext *, DisasOps *); + + /* + * Post-process output after HELP_OP. + * Note that these are not called if HELP_OP returns DISAS_NORETURN. + */ + void (*help_wout)(DisasContext *, DisasOps *); + void (*help_cout)(DisasContext *, DisasOps *); + + /* Implement the operation itself. */ + DisasJumpType (*help_op)(DisasContext *, DisasOps *); + + uint64_t data; +}; + +/* ====================================================================== */ +/* Miscellaneous helpers, used by several operations. */ + +static void help_l2_shift(DisasContext *s, DisasOps *o, int mask) +{ + int b2 = get_field(s, b2); + int d2 = get_field(s, d2); + + if (b2 == 0) { + o->in2 = tcg_const_i64(d2 & mask); + } else { + o->in2 = get_address(s, 0, b2, d2); + tcg_gen_andi_i64(o->in2, o->in2, mask); + } +} + +static DisasJumpType help_goto_direct(DisasContext *s, uint64_t dest) +{ + if (dest == s->pc_tmp) { + per_branch(s, true); + return DISAS_NEXT; + } + if (use_goto_tb(s, dest)) { + update_cc_op(s); + per_breaking_event(s); + tcg_gen_goto_tb(0); + tcg_gen_movi_i64(psw_addr, dest); + tcg_gen_exit_tb(s->base.tb, 0); + return DISAS_GOTO_TB; + } else { + tcg_gen_movi_i64(psw_addr, dest); + per_branch(s, false); + return DISAS_PC_UPDATED; + } +} + +static DisasJumpType help_branch(DisasContext *s, DisasCompare *c, + bool is_imm, int imm, TCGv_i64 cdest) +{ + DisasJumpType ret; + uint64_t dest = s->base.pc_next + 2 * imm; + TCGLabel *lab; + + /* Take care of the special cases first. */ + if (c->cond == TCG_COND_NEVER) { + ret = DISAS_NEXT; + goto egress; + } + if (is_imm) { + if (dest == s->pc_tmp) { + /* Branch to next. */ + per_branch(s, true); + ret = DISAS_NEXT; + goto egress; + } + if (c->cond == TCG_COND_ALWAYS) { + ret = help_goto_direct(s, dest); + goto egress; + } + } else { + if (!cdest) { + /* E.g. bcr %r0 -> no branch. */ + ret = DISAS_NEXT; + goto egress; + } + if (c->cond == TCG_COND_ALWAYS) { + tcg_gen_mov_i64(psw_addr, cdest); + per_branch(s, false); + ret = DISAS_PC_UPDATED; + goto egress; + } + } + + if (use_goto_tb(s, s->pc_tmp)) { + if (is_imm && use_goto_tb(s, dest)) { + /* Both exits can use goto_tb. */ + update_cc_op(s); + + lab = gen_new_label(); + if (c->is_64) { + tcg_gen_brcond_i64(c->cond, c->u.s64.a, c->u.s64.b, lab); + } else { + tcg_gen_brcond_i32(c->cond, c->u.s32.a, c->u.s32.b, lab); + } + + /* Branch not taken. */ + tcg_gen_goto_tb(0); + tcg_gen_movi_i64(psw_addr, s->pc_tmp); + tcg_gen_exit_tb(s->base.tb, 0); + + /* Branch taken. */ + gen_set_label(lab); + per_breaking_event(s); + tcg_gen_goto_tb(1); + tcg_gen_movi_i64(psw_addr, dest); + tcg_gen_exit_tb(s->base.tb, 1); + + ret = DISAS_GOTO_TB; + } else { + /* Fallthru can use goto_tb, but taken branch cannot. */ + /* Store taken branch destination before the brcond. This + avoids having to allocate a new local temp to hold it. + We'll overwrite this in the not taken case anyway. */ + if (!is_imm) { + tcg_gen_mov_i64(psw_addr, cdest); + } + + lab = gen_new_label(); + if (c->is_64) { + tcg_gen_brcond_i64(c->cond, c->u.s64.a, c->u.s64.b, lab); + } else { + tcg_gen_brcond_i32(c->cond, c->u.s32.a, c->u.s32.b, lab); + } + + /* Branch not taken. */ + update_cc_op(s); + tcg_gen_goto_tb(0); + tcg_gen_movi_i64(psw_addr, s->pc_tmp); + tcg_gen_exit_tb(s->base.tb, 0); + + gen_set_label(lab); + if (is_imm) { + tcg_gen_movi_i64(psw_addr, dest); + } + per_breaking_event(s); + ret = DISAS_PC_UPDATED; + } + } else { + /* Fallthru cannot use goto_tb. This by itself is vanishingly rare. + Most commonly we're single-stepping or some other condition that + disables all use of goto_tb. Just update the PC and exit. */ + + TCGv_i64 next = tcg_const_i64(s->pc_tmp); + if (is_imm) { + cdest = tcg_const_i64(dest); + } + + if (c->is_64) { + tcg_gen_movcond_i64(c->cond, psw_addr, c->u.s64.a, c->u.s64.b, + cdest, next); + per_branch_cond(s, c->cond, c->u.s64.a, c->u.s64.b); + } else { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 z = tcg_const_i64(0); + tcg_gen_setcond_i32(c->cond, t0, c->u.s32.a, c->u.s32.b); + tcg_gen_extu_i32_i64(t1, t0); + tcg_temp_free_i32(t0); + tcg_gen_movcond_i64(TCG_COND_NE, psw_addr, t1, z, cdest, next); + per_branch_cond(s, TCG_COND_NE, t1, z); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(z); + } + + if (is_imm) { + tcg_temp_free_i64(cdest); + } + tcg_temp_free_i64(next); + + ret = DISAS_PC_UPDATED; + } + + egress: + free_compare(c); + return ret; +} + +/* ====================================================================== */ +/* The operations. These perform the bulk of the work for any insn, + usually after the operands have been loaded and output initialized. */ + +static DisasJumpType op_abs(DisasContext *s, DisasOps *o) +{ + tcg_gen_abs_i64(o->out, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_absf32(DisasContext *s, DisasOps *o) +{ + tcg_gen_andi_i64(o->out, o->in2, 0x7fffffffull); + return DISAS_NEXT; +} + +static DisasJumpType op_absf64(DisasContext *s, DisasOps *o) +{ + tcg_gen_andi_i64(o->out, o->in2, 0x7fffffffffffffffull); + return DISAS_NEXT; +} + +static DisasJumpType op_absf128(DisasContext *s, DisasOps *o) +{ + tcg_gen_andi_i64(o->out, o->in1, 0x7fffffffffffffffull); + tcg_gen_mov_i64(o->out2, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_add(DisasContext *s, DisasOps *o) +{ + tcg_gen_add_i64(o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_addu64(DisasContext *s, DisasOps *o) +{ + tcg_gen_movi_i64(cc_src, 0); + tcg_gen_add2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src); + return DISAS_NEXT; +} + +/* Compute carry into cc_src. */ +static void compute_carry(DisasContext *s) +{ + switch (s->cc_op) { + case CC_OP_ADDU: + /* The carry value is already in cc_src (1,0). */ + break; + case CC_OP_SUBU: + tcg_gen_addi_i64(cc_src, cc_src, 1); + break; + default: + gen_op_calc_cc(s); + /* fall through */ + case CC_OP_STATIC: + /* The carry flag is the msb of CC; compute into cc_src. */ + tcg_gen_extu_i32_i64(cc_src, cc_op); + tcg_gen_shri_i64(cc_src, cc_src, 1); + break; + } +} + +static DisasJumpType op_addc32(DisasContext *s, DisasOps *o) +{ + compute_carry(s); + tcg_gen_add_i64(o->out, o->in1, o->in2); + tcg_gen_add_i64(o->out, o->out, cc_src); + return DISAS_NEXT; +} + +static DisasJumpType op_addc64(DisasContext *s, DisasOps *o) +{ + compute_carry(s); + + TCGv_i64 zero = tcg_const_i64(0); + tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, zero); + tcg_gen_add2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero); + tcg_temp_free_i64(zero); + + return DISAS_NEXT; +} + +static DisasJumpType op_asi(DisasContext *s, DisasOps *o) +{ + bool non_atomic = !s390_has_feat(S390_FEAT_STFLE_45); + + o->in1 = tcg_temp_new_i64(); + if (non_atomic) { + tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data); + } else { + /* Perform the atomic addition in memory. */ + tcg_gen_atomic_fetch_add_i64(o->in1, o->addr1, o->in2, get_mem_index(s), + s->insn->data); + } + + /* Recompute also for atomic case: needed for setting CC. */ + tcg_gen_add_i64(o->out, o->in1, o->in2); + + if (non_atomic) { + tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data); + } + return DISAS_NEXT; +} + +static DisasJumpType op_asiu64(DisasContext *s, DisasOps *o) +{ + bool non_atomic = !s390_has_feat(S390_FEAT_STFLE_45); + + o->in1 = tcg_temp_new_i64(); + if (non_atomic) { + tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data); + } else { + /* Perform the atomic addition in memory. */ + tcg_gen_atomic_fetch_add_i64(o->in1, o->addr1, o->in2, get_mem_index(s), + s->insn->data); + } + + /* Recompute also for atomic case: needed for setting CC. */ + tcg_gen_movi_i64(cc_src, 0); + tcg_gen_add2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src); + + if (non_atomic) { + tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data); + } + return DISAS_NEXT; +} + +static DisasJumpType op_aeb(DisasContext *s, DisasOps *o) +{ + gen_helper_aeb(o->out, cpu_env, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_adb(DisasContext *s, DisasOps *o) +{ + gen_helper_adb(o->out, cpu_env, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_axb(DisasContext *s, DisasOps *o) +{ + gen_helper_axb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2); + return_low128(o->out2); + return DISAS_NEXT; +} + +static DisasJumpType op_and(DisasContext *s, DisasOps *o) +{ + tcg_gen_and_i64(o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_andi(DisasContext *s, DisasOps *o) +{ + int shift = s->insn->data & 0xff; + int size = s->insn->data >> 8; + uint64_t mask = ((1ull << size) - 1) << shift; + + assert(!o->g_in2); + tcg_gen_shli_i64(o->in2, o->in2, shift); + tcg_gen_ori_i64(o->in2, o->in2, ~mask); + tcg_gen_and_i64(o->out, o->in1, o->in2); + + /* Produce the CC from only the bits manipulated. */ + tcg_gen_andi_i64(cc_dst, o->out, mask); + set_cc_nz_u64(s, cc_dst); + return DISAS_NEXT; +} + +static DisasJumpType op_ni(DisasContext *s, DisasOps *o) +{ + o->in1 = tcg_temp_new_i64(); + + if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) { + tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data); + } else { + /* Perform the atomic operation in memory. */ + tcg_gen_atomic_fetch_and_i64(o->in1, o->addr1, o->in2, get_mem_index(s), + s->insn->data); + } + + /* Recompute also for atomic case: needed for setting CC. */ + tcg_gen_and_i64(o->out, o->in1, o->in2); + + if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) { + tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data); + } + return DISAS_NEXT; +} + +static DisasJumpType op_bas(DisasContext *s, DisasOps *o) +{ + pc_to_link_info(o->out, s, s->pc_tmp); + if (o->in2) { + tcg_gen_mov_i64(psw_addr, o->in2); + per_branch(s, false); + return DISAS_PC_UPDATED; + } else { + return DISAS_NEXT; + } +} + +static void save_link_info(DisasContext *s, DisasOps *o) +{ + TCGv_i64 t; + + if (s->base.tb->flags & (FLAG_MASK_32 | FLAG_MASK_64)) { + pc_to_link_info(o->out, s, s->pc_tmp); + return; + } + gen_op_calc_cc(s); + tcg_gen_andi_i64(o->out, o->out, 0xffffffff00000000ull); + tcg_gen_ori_i64(o->out, o->out, ((s->ilen / 2) << 30) | s->pc_tmp); + t = tcg_temp_new_i64(); + tcg_gen_shri_i64(t, psw_mask, 16); + tcg_gen_andi_i64(t, t, 0x0f000000); + tcg_gen_or_i64(o->out, o->out, t); + tcg_gen_extu_i32_i64(t, cc_op); + tcg_gen_shli_i64(t, t, 28); + tcg_gen_or_i64(o->out, o->out, t); + tcg_temp_free_i64(t); +} + +static DisasJumpType op_bal(DisasContext *s, DisasOps *o) +{ + save_link_info(s, o); + if (o->in2) { + tcg_gen_mov_i64(psw_addr, o->in2); + per_branch(s, false); + return DISAS_PC_UPDATED; + } else { + return DISAS_NEXT; + } +} + +static DisasJumpType op_basi(DisasContext *s, DisasOps *o) +{ + pc_to_link_info(o->out, s, s->pc_tmp); + return help_goto_direct(s, s->base.pc_next + 2 * get_field(s, i2)); +} + +static DisasJumpType op_bc(DisasContext *s, DisasOps *o) +{ + int m1 = get_field(s, m1); + bool is_imm = have_field(s, i2); + int imm = is_imm ? get_field(s, i2) : 0; + DisasCompare c; + + /* BCR with R2 = 0 causes no branching */ + if (have_field(s, r2) && get_field(s, r2) == 0) { + if (m1 == 14) { + /* Perform serialization */ + /* FIXME: check for fast-BCR-serialization facility */ + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); + } + if (m1 == 15) { + /* Perform serialization */ + /* FIXME: perform checkpoint-synchronisation */ + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); + } + return DISAS_NEXT; + } + + disas_jcc(s, &c, m1); + return help_branch(s, &c, is_imm, imm, o->in2); +} + +static DisasJumpType op_bct32(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + bool is_imm = have_field(s, i2); + int imm = is_imm ? get_field(s, i2) : 0; + DisasCompare c; + TCGv_i64 t; + + c.cond = TCG_COND_NE; + c.is_64 = false; + c.g1 = false; + c.g2 = false; + + t = tcg_temp_new_i64(); + tcg_gen_subi_i64(t, regs[r1], 1); + store_reg32_i64(r1, t); + c.u.s32.a = tcg_temp_new_i32(); + c.u.s32.b = tcg_const_i32(0); + tcg_gen_extrl_i64_i32(c.u.s32.a, t); + tcg_temp_free_i64(t); + + return help_branch(s, &c, is_imm, imm, o->in2); +} + +static DisasJumpType op_bcth(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int imm = get_field(s, i2); + DisasCompare c; + TCGv_i64 t; + + c.cond = TCG_COND_NE; + c.is_64 = false; + c.g1 = false; + c.g2 = false; + + t = tcg_temp_new_i64(); + tcg_gen_shri_i64(t, regs[r1], 32); + tcg_gen_subi_i64(t, t, 1); + store_reg32h_i64(r1, t); + c.u.s32.a = tcg_temp_new_i32(); + c.u.s32.b = tcg_const_i32(0); + tcg_gen_extrl_i64_i32(c.u.s32.a, t); + tcg_temp_free_i64(t); + + return help_branch(s, &c, 1, imm, o->in2); +} + +static DisasJumpType op_bct64(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + bool is_imm = have_field(s, i2); + int imm = is_imm ? get_field(s, i2) : 0; + DisasCompare c; + + c.cond = TCG_COND_NE; + c.is_64 = true; + c.g1 = true; + c.g2 = false; + + tcg_gen_subi_i64(regs[r1], regs[r1], 1); + c.u.s64.a = regs[r1]; + c.u.s64.b = tcg_const_i64(0); + + return help_branch(s, &c, is_imm, imm, o->in2); +} + +static DisasJumpType op_bx32(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r3 = get_field(s, r3); + bool is_imm = have_field(s, i2); + int imm = is_imm ? get_field(s, i2) : 0; + DisasCompare c; + TCGv_i64 t; + + c.cond = (s->insn->data ? TCG_COND_LE : TCG_COND_GT); + c.is_64 = false; + c.g1 = false; + c.g2 = false; + + t = tcg_temp_new_i64(); + tcg_gen_add_i64(t, regs[r1], regs[r3]); + c.u.s32.a = tcg_temp_new_i32(); + c.u.s32.b = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(c.u.s32.a, t); + tcg_gen_extrl_i64_i32(c.u.s32.b, regs[r3 | 1]); + store_reg32_i64(r1, t); + tcg_temp_free_i64(t); + + return help_branch(s, &c, is_imm, imm, o->in2); +} + +static DisasJumpType op_bx64(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r3 = get_field(s, r3); + bool is_imm = have_field(s, i2); + int imm = is_imm ? get_field(s, i2) : 0; + DisasCompare c; + + c.cond = (s->insn->data ? TCG_COND_LE : TCG_COND_GT); + c.is_64 = true; + + if (r1 == (r3 | 1)) { + c.u.s64.b = load_reg(r3 | 1); + c.g2 = false; + } else { + c.u.s64.b = regs[r3 | 1]; + c.g2 = true; + } + + tcg_gen_add_i64(regs[r1], regs[r1], regs[r3]); + c.u.s64.a = regs[r1]; + c.g1 = true; + + return help_branch(s, &c, is_imm, imm, o->in2); +} + +static DisasJumpType op_cj(DisasContext *s, DisasOps *o) +{ + int imm, m3 = get_field(s, m3); + bool is_imm; + DisasCompare c; + + c.cond = ltgt_cond[m3]; + if (s->insn->data) { + c.cond = tcg_unsigned_cond(c.cond); + } + c.is_64 = c.g1 = c.g2 = true; + c.u.s64.a = o->in1; + c.u.s64.b = o->in2; + + is_imm = have_field(s, i4); + if (is_imm) { + imm = get_field(s, i4); + } else { + imm = 0; + o->out = get_address(s, 0, get_field(s, b4), + get_field(s, d4)); + } + + return help_branch(s, &c, is_imm, imm, o->out); +} + +static DisasJumpType op_ceb(DisasContext *s, DisasOps *o) +{ + gen_helper_ceb(cc_op, cpu_env, o->in1, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_cdb(DisasContext *s, DisasOps *o) +{ + gen_helper_cdb(cc_op, cpu_env, o->in1, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_cxb(DisasContext *s, DisasOps *o) +{ + gen_helper_cxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static TCGv_i32 fpinst_extract_m34(DisasContext *s, bool m3_with_fpe, + bool m4_with_fpe) +{ + const bool fpe = s390_has_feat(S390_FEAT_FLOATING_POINT_EXT); + uint8_t m3 = get_field(s, m3); + uint8_t m4 = get_field(s, m4); + + /* m3 field was introduced with FPE */ + if (!fpe && m3_with_fpe) { + m3 = 0; + } + /* m4 field was introduced with FPE */ + if (!fpe && m4_with_fpe) { + m4 = 0; + } + + /* Check for valid rounding modes. Mode 3 was introduced later. */ + if (m3 == 2 || m3 > 7 || (!fpe && m3 == 3)) { + gen_program_exception(s, PGM_SPECIFICATION); + return NULL; + } + + return tcg_const_i32(deposit32(m3, 4, 4, m4)); +} + +static DisasJumpType op_cfeb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_cfeb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_cfdb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_cfdb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_cfxb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_cfxb(o->out, cpu_env, o->in1, o->in2, m34); + tcg_temp_free_i32(m34); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_cgeb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_cgeb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_cgdb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_cgdb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_cgxb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_cgxb(o->out, cpu_env, o->in1, o->in2, m34); + tcg_temp_free_i32(m34); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_clfeb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, false); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_clfeb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_clfdb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, false); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_clfdb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_clfxb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, false); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_clfxb(o->out, cpu_env, o->in1, o->in2, m34); + tcg_temp_free_i32(m34); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_clgeb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, false); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_clgeb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_clgdb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, false); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_clgdb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_clgxb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, false); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_clgxb(o->out, cpu_env, o->in1, o->in2, m34); + tcg_temp_free_i32(m34); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_cegb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, true, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_cegb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + return DISAS_NEXT; +} + +static DisasJumpType op_cdgb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, true, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_cdgb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + return DISAS_NEXT; +} + +static DisasJumpType op_cxgb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, true, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_cxgb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + return_low128(o->out2); + return DISAS_NEXT; +} + +static DisasJumpType op_celgb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, false); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_celgb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + return DISAS_NEXT; +} + +static DisasJumpType op_cdlgb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, false); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_cdlgb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + return DISAS_NEXT; +} + +static DisasJumpType op_cxlgb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, false); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_cxlgb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + return_low128(o->out2); + return DISAS_NEXT; +} + +static DisasJumpType op_cksm(DisasContext *s, DisasOps *o) +{ + int r2 = get_field(s, r2); + TCGv_i64 len = tcg_temp_new_i64(); + + gen_helper_cksm(len, cpu_env, o->in1, o->in2, regs[r2 + 1]); + set_cc_static(s); + return_low128(o->out); + + tcg_gen_add_i64(regs[r2], regs[r2], len); + tcg_gen_sub_i64(regs[r2 + 1], regs[r2 + 1], len); + tcg_temp_free_i64(len); + + return DISAS_NEXT; +} + +static DisasJumpType op_clc(DisasContext *s, DisasOps *o) +{ + int l = get_field(s, l1); + TCGv_i32 vl; + + switch (l + 1) { + case 1: + tcg_gen_qemu_ld8u(cc_src, o->addr1, get_mem_index(s)); + tcg_gen_qemu_ld8u(cc_dst, o->in2, get_mem_index(s)); + break; + case 2: + tcg_gen_qemu_ld16u(cc_src, o->addr1, get_mem_index(s)); + tcg_gen_qemu_ld16u(cc_dst, o->in2, get_mem_index(s)); + break; + case 4: + tcg_gen_qemu_ld32u(cc_src, o->addr1, get_mem_index(s)); + tcg_gen_qemu_ld32u(cc_dst, o->in2, get_mem_index(s)); + break; + case 8: + tcg_gen_qemu_ld64(cc_src, o->addr1, get_mem_index(s)); + tcg_gen_qemu_ld64(cc_dst, o->in2, get_mem_index(s)); + break; + default: + vl = tcg_const_i32(l); + gen_helper_clc(cc_op, cpu_env, vl, o->addr1, o->in2); + tcg_temp_free_i32(vl); + set_cc_static(s); + return DISAS_NEXT; + } + gen_op_update2_cc_i64(s, CC_OP_LTUGTU_64, cc_src, cc_dst); + return DISAS_NEXT; +} + +static DisasJumpType op_clcl(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r2 = get_field(s, r2); + TCGv_i32 t1, t2; + + /* r1 and r2 must be even. */ + if (r1 & 1 || r2 & 1) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + t1 = tcg_const_i32(r1); + t2 = tcg_const_i32(r2); + gen_helper_clcl(cc_op, cpu_env, t1, t2); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_clcle(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r3 = get_field(s, r3); + TCGv_i32 t1, t3; + + /* r1 and r3 must be even. */ + if (r1 & 1 || r3 & 1) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + t1 = tcg_const_i32(r1); + t3 = tcg_const_i32(r3); + gen_helper_clcle(cc_op, cpu_env, t1, o->in2, t3); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t3); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_clclu(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r3 = get_field(s, r3); + TCGv_i32 t1, t3; + + /* r1 and r3 must be even. */ + if (r1 & 1 || r3 & 1) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + t1 = tcg_const_i32(r1); + t3 = tcg_const_i32(r3); + gen_helper_clclu(cc_op, cpu_env, t1, o->in2, t3); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t3); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_clm(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m3 = tcg_const_i32(get_field(s, m3)); + TCGv_i32 t1 = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(t1, o->in1); + gen_helper_clm(cc_op, cpu_env, t1, m3, o->in2); + set_cc_static(s); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(m3); + return DISAS_NEXT; +} + +static DisasJumpType op_clst(DisasContext *s, DisasOps *o) +{ + gen_helper_clst(o->in1, cpu_env, regs[0], o->in1, o->in2); + set_cc_static(s); + return_low128(o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_cps(DisasContext *s, DisasOps *o) +{ + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_andi_i64(t, o->in1, 0x8000000000000000ull); + tcg_gen_andi_i64(o->out, o->in2, 0x7fffffffffffffffull); + tcg_gen_or_i64(o->out, o->out, t); + tcg_temp_free_i64(t); + return DISAS_NEXT; +} + +static DisasJumpType op_cs(DisasContext *s, DisasOps *o) +{ + int d2 = get_field(s, d2); + int b2 = get_field(s, b2); + TCGv_i64 addr, cc; + + /* Note that in1 = R3 (new value) and + in2 = (zero-extended) R1 (expected value). */ + + addr = get_address(s, 0, b2, d2); + tcg_gen_atomic_cmpxchg_i64(o->out, addr, o->in2, o->in1, + get_mem_index(s), s->insn->data | MO_ALIGN); + tcg_temp_free_i64(addr); + + /* Are the memory and expected values (un)equal? Note that this setcond + produces the output CC value, thus the NE sense of the test. */ + cc = tcg_temp_new_i64(); + tcg_gen_setcond_i64(TCG_COND_NE, cc, o->in2, o->out); + tcg_gen_extrl_i64_i32(cc_op, cc); + tcg_temp_free_i64(cc); + set_cc_static(s); + + return DISAS_NEXT; +} + +static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r3 = get_field(s, r3); + int d2 = get_field(s, d2); + int b2 = get_field(s, b2); + DisasJumpType ret = DISAS_NEXT; + TCGv_i64 addr; + TCGv_i32 t_r1, t_r3; + + /* Note that R1:R1+1 = expected value and R3:R3+1 = new value. */ + addr = get_address(s, 0, b2, d2); + t_r1 = tcg_const_i32(r1); + t_r3 = tcg_const_i32(r3); + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_cdsg(cpu_env, addr, t_r1, t_r3); + } else if (HAVE_CMPXCHG128) { + gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3); + } else { + gen_helper_exit_atomic(cpu_env); + ret = DISAS_NORETURN; + } + tcg_temp_free_i64(addr); + tcg_temp_free_i32(t_r1); + tcg_temp_free_i32(t_r3); + + set_cc_static(s); + return ret; +} + +static DisasJumpType op_csst(DisasContext *s, DisasOps *o) +{ + int r3 = get_field(s, r3); + TCGv_i32 t_r3 = tcg_const_i32(r3); + + if (tb_cflags(s->base.tb) & CF_PARALLEL) { + gen_helper_csst_parallel(cc_op, cpu_env, t_r3, o->addr1, o->in2); + } else { + gen_helper_csst(cc_op, cpu_env, t_r3, o->addr1, o->in2); + } + tcg_temp_free_i32(t_r3); + + set_cc_static(s); + return DISAS_NEXT; +} + +#ifndef CONFIG_USER_ONLY +static DisasJumpType op_csp(DisasContext *s, DisasOps *o) +{ + MemOp mop = s->insn->data; + TCGv_i64 addr, old, cc; + TCGLabel *lab = gen_new_label(); + + /* Note that in1 = R1 (zero-extended expected value), + out = R1 (original reg), out2 = R1+1 (new value). */ + + addr = tcg_temp_new_i64(); + old = tcg_temp_new_i64(); + tcg_gen_andi_i64(addr, o->in2, -1ULL << (mop & MO_SIZE)); + tcg_gen_atomic_cmpxchg_i64(old, addr, o->in1, o->out2, + get_mem_index(s), mop | MO_ALIGN); + tcg_temp_free_i64(addr); + + /* Are the memory and expected values (un)equal? */ + cc = tcg_temp_new_i64(); + tcg_gen_setcond_i64(TCG_COND_NE, cc, o->in1, old); + tcg_gen_extrl_i64_i32(cc_op, cc); + + /* Write back the output now, so that it happens before the + following branch, so that we don't need local temps. */ + if ((mop & MO_SIZE) == MO_32) { + tcg_gen_deposit_i64(o->out, o->out, old, 0, 32); + } else { + tcg_gen_mov_i64(o->out, old); + } + tcg_temp_free_i64(old); + + /* If the comparison was equal, and the LSB of R2 was set, + then we need to flush the TLB (for all cpus). */ + tcg_gen_xori_i64(cc, cc, 1); + tcg_gen_and_i64(cc, cc, o->in2); + tcg_gen_brcondi_i64(TCG_COND_EQ, cc, 0, lab); + tcg_temp_free_i64(cc); + + gen_helper_purge(cpu_env); + gen_set_label(lab); + + return DISAS_NEXT; +} +#endif + +static DisasJumpType op_cvd(DisasContext *s, DisasOps *o) +{ + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i32 t2 = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(t2, o->in1); + gen_helper_cvd(t1, t2); + tcg_temp_free_i32(t2); + tcg_gen_qemu_st64(t1, o->in2, get_mem_index(s)); + tcg_temp_free_i64(t1); + return DISAS_NEXT; +} + +static DisasJumpType op_ct(DisasContext *s, DisasOps *o) +{ + int m3 = get_field(s, m3); + TCGLabel *lab = gen_new_label(); + TCGCond c; + + c = tcg_invert_cond(ltgt_cond[m3]); + if (s->insn->data) { + c = tcg_unsigned_cond(c); + } + tcg_gen_brcond_i64(c, o->in1, o->in2, lab); + + /* Trap. */ + gen_trap(s); + + gen_set_label(lab); + return DISAS_NEXT; +} + +static DisasJumpType op_cuXX(DisasContext *s, DisasOps *o) +{ + int m3 = get_field(s, m3); + int r1 = get_field(s, r1); + int r2 = get_field(s, r2); + TCGv_i32 tr1, tr2, chk; + + /* R1 and R2 must both be even. */ + if ((r1 | r2) & 1) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + if (!s390_has_feat(S390_FEAT_ETF3_ENH)) { + m3 = 0; + } + + tr1 = tcg_const_i32(r1); + tr2 = tcg_const_i32(r2); + chk = tcg_const_i32(m3); + + switch (s->insn->data) { + case 12: + gen_helper_cu12(cc_op, cpu_env, tr1, tr2, chk); + break; + case 14: + gen_helper_cu14(cc_op, cpu_env, tr1, tr2, chk); + break; + case 21: + gen_helper_cu21(cc_op, cpu_env, tr1, tr2, chk); + break; + case 24: + gen_helper_cu24(cc_op, cpu_env, tr1, tr2, chk); + break; + case 41: + gen_helper_cu41(cc_op, cpu_env, tr1, tr2, chk); + break; + case 42: + gen_helper_cu42(cc_op, cpu_env, tr1, tr2, chk); + break; + default: + g_assert_not_reached(); + } + + tcg_temp_free_i32(tr1); + tcg_temp_free_i32(tr2); + tcg_temp_free_i32(chk); + set_cc_static(s); + return DISAS_NEXT; +} + +#ifndef CONFIG_USER_ONLY +static DisasJumpType op_diag(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r3 = tcg_const_i32(get_field(s, r3)); + TCGv_i32 func_code = tcg_const_i32(get_field(s, i2)); + + gen_helper_diag(cpu_env, r1, r3, func_code); + + tcg_temp_free_i32(func_code); + tcg_temp_free_i32(r3); + tcg_temp_free_i32(r1); + return DISAS_NEXT; +} +#endif + +static DisasJumpType op_divs32(DisasContext *s, DisasOps *o) +{ + gen_helper_divs32(o->out2, cpu_env, o->in1, o->in2); + return_low128(o->out); + return DISAS_NEXT; +} + +static DisasJumpType op_divu32(DisasContext *s, DisasOps *o) +{ + gen_helper_divu32(o->out2, cpu_env, o->in1, o->in2); + return_low128(o->out); + return DISAS_NEXT; +} + +static DisasJumpType op_divs64(DisasContext *s, DisasOps *o) +{ + gen_helper_divs64(o->out2, cpu_env, o->in1, o->in2); + return_low128(o->out); + return DISAS_NEXT; +} + +static DisasJumpType op_divu64(DisasContext *s, DisasOps *o) +{ + gen_helper_divu64(o->out2, cpu_env, o->out, o->out2, o->in2); + return_low128(o->out); + return DISAS_NEXT; +} + +static DisasJumpType op_deb(DisasContext *s, DisasOps *o) +{ + gen_helper_deb(o->out, cpu_env, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_ddb(DisasContext *s, DisasOps *o) +{ + gen_helper_ddb(o->out, cpu_env, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_dxb(DisasContext *s, DisasOps *o) +{ + gen_helper_dxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2); + return_low128(o->out2); + return DISAS_NEXT; +} + +static DisasJumpType op_ear(DisasContext *s, DisasOps *o) +{ + int r2 = get_field(s, r2); + tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, aregs[r2])); + return DISAS_NEXT; +} + +static DisasJumpType op_ecag(DisasContext *s, DisasOps *o) +{ + /* No cache information provided. */ + tcg_gen_movi_i64(o->out, -1); + return DISAS_NEXT; +} + +static DisasJumpType op_efpc(DisasContext *s, DisasOps *o) +{ + tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, fpc)); + return DISAS_NEXT; +} + +static DisasJumpType op_epsw(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r2 = get_field(s, r2); + TCGv_i64 t = tcg_temp_new_i64(); + + /* Note the "subsequently" in the PoO, which implies a defined result + if r1 == r2. Thus we cannot defer these writes to an output hook. */ + tcg_gen_shri_i64(t, psw_mask, 32); + store_reg32_i64(r1, t); + if (r2 != 0) { + store_reg32_i64(r2, psw_mask); + } + + tcg_temp_free_i64(t); + return DISAS_NEXT; +} + +static DisasJumpType op_ex(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + TCGv_i32 ilen; + TCGv_i64 v1; + + /* Nested EXECUTE is not allowed. */ + if (unlikely(s->ex_value)) { + gen_program_exception(s, PGM_EXECUTE); + return DISAS_NORETURN; + } + + update_psw_addr(s); + update_cc_op(s); + + if (r1 == 0) { + v1 = tcg_const_i64(0); + } else { + v1 = regs[r1]; + } + + ilen = tcg_const_i32(s->ilen); + gen_helper_ex(cpu_env, ilen, v1, o->in2); + tcg_temp_free_i32(ilen); + + if (r1 == 0) { + tcg_temp_free_i64(v1); + } + + return DISAS_PC_CC_UPDATED; +} + +static DisasJumpType op_fieb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_fieb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + return DISAS_NEXT; +} + +static DisasJumpType op_fidb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_fidb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + return DISAS_NEXT; +} + +static DisasJumpType op_fixb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, false, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_fixb(o->out, cpu_env, o->in1, o->in2, m34); + return_low128(o->out2); + tcg_temp_free_i32(m34); + return DISAS_NEXT; +} + +static DisasJumpType op_flogr(DisasContext *s, DisasOps *o) +{ + /* We'll use the original input for cc computation, since we get to + compare that against 0, which ought to be better than comparing + the real output against 64. It also lets cc_dst be a convenient + temporary during our computation. */ + gen_op_update1_cc_i64(s, CC_OP_FLOGR, o->in2); + + /* R1 = IN ? CLZ(IN) : 64. */ + tcg_gen_clzi_i64(o->out, o->in2, 64); + + /* R1+1 = IN & ~(found bit). Note that we may attempt to shift this + value by 64, which is undefined. But since the shift is 64 iff the + input is zero, we still get the correct result after and'ing. */ + tcg_gen_movi_i64(o->out2, 0x8000000000000000ull); + tcg_gen_shr_i64(o->out2, o->out2, o->out); + tcg_gen_andc_i64(o->out2, cc_dst, o->out2); + return DISAS_NEXT; +} + +static DisasJumpType op_icm(DisasContext *s, DisasOps *o) +{ + int m3 = get_field(s, m3); + int pos, len, base = s->insn->data; + TCGv_i64 tmp = tcg_temp_new_i64(); + uint64_t ccm; + + switch (m3) { + case 0xf: + /* Effectively a 32-bit load. */ + tcg_gen_qemu_ld32u(tmp, o->in2, get_mem_index(s)); + len = 32; + goto one_insert; + + case 0xc: + case 0x6: + case 0x3: + /* Effectively a 16-bit load. */ + tcg_gen_qemu_ld16u(tmp, o->in2, get_mem_index(s)); + len = 16; + goto one_insert; + + case 0x8: + case 0x4: + case 0x2: + case 0x1: + /* Effectively an 8-bit load. */ + tcg_gen_qemu_ld8u(tmp, o->in2, get_mem_index(s)); + len = 8; + goto one_insert; + + one_insert: + pos = base + ctz32(m3) * 8; + tcg_gen_deposit_i64(o->out, o->out, tmp, pos, len); + ccm = ((1ull << len) - 1) << pos; + break; + + default: + /* This is going to be a sequence of loads and inserts. */ + pos = base + 32 - 8; + ccm = 0; + while (m3) { + if (m3 & 0x8) { + tcg_gen_qemu_ld8u(tmp, o->in2, get_mem_index(s)); + tcg_gen_addi_i64(o->in2, o->in2, 1); + tcg_gen_deposit_i64(o->out, o->out, tmp, pos, 8); + ccm |= 0xff << pos; + } + m3 = (m3 << 1) & 0xf; + pos -= 8; + } + break; + } + + tcg_gen_movi_i64(tmp, ccm); + gen_op_update2_cc_i64(s, CC_OP_ICM, tmp, o->out); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_insi(DisasContext *s, DisasOps *o) +{ + int shift = s->insn->data & 0xff; + int size = s->insn->data >> 8; + tcg_gen_deposit_i64(o->out, o->in1, o->in2, shift, size); + return DISAS_NEXT; +} + +static DisasJumpType op_ipm(DisasContext *s, DisasOps *o) +{ + TCGv_i64 t1, t2; + + gen_op_calc_cc(s); + t1 = tcg_temp_new_i64(); + tcg_gen_extract_i64(t1, psw_mask, 40, 4); + t2 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(t2, cc_op); + tcg_gen_deposit_i64(t1, t1, t2, 4, 60); + tcg_gen_deposit_i64(o->out, o->out, t1, 24, 8); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + return DISAS_NEXT; +} + +#ifndef CONFIG_USER_ONLY +static DisasJumpType op_idte(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m4; + + if (s390_has_feat(S390_FEAT_LOCAL_TLB_CLEARING)) { + m4 = tcg_const_i32(get_field(s, m4)); + } else { + m4 = tcg_const_i32(0); + } + gen_helper_idte(cpu_env, o->in1, o->in2, m4); + tcg_temp_free_i32(m4); + return DISAS_NEXT; +} + +static DisasJumpType op_ipte(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m4; + + if (s390_has_feat(S390_FEAT_LOCAL_TLB_CLEARING)) { + m4 = tcg_const_i32(get_field(s, m4)); + } else { + m4 = tcg_const_i32(0); + } + gen_helper_ipte(cpu_env, o->in1, o->in2, m4); + tcg_temp_free_i32(m4); + return DISAS_NEXT; +} + +static DisasJumpType op_iske(DisasContext *s, DisasOps *o) +{ + gen_helper_iske(o->out, cpu_env, o->in2); + return DISAS_NEXT; +} +#endif + +static DisasJumpType op_msa(DisasContext *s, DisasOps *o) +{ + int r1 = have_field(s, r1) ? get_field(s, r1) : 0; + int r2 = have_field(s, r2) ? get_field(s, r2) : 0; + int r3 = have_field(s, r3) ? get_field(s, r3) : 0; + TCGv_i32 t_r1, t_r2, t_r3, type; + + switch (s->insn->data) { + case S390_FEAT_TYPE_KMA: + if (r3 == r1 || r3 == r2) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + /* FALL THROUGH */ + case S390_FEAT_TYPE_KMCTR: + if (r3 & 1 || !r3) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + /* FALL THROUGH */ + case S390_FEAT_TYPE_PPNO: + case S390_FEAT_TYPE_KMF: + case S390_FEAT_TYPE_KMC: + case S390_FEAT_TYPE_KMO: + case S390_FEAT_TYPE_KM: + if (r1 & 1 || !r1) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + /* FALL THROUGH */ + case S390_FEAT_TYPE_KMAC: + case S390_FEAT_TYPE_KIMD: + case S390_FEAT_TYPE_KLMD: + if (r2 & 1 || !r2) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + /* FALL THROUGH */ + case S390_FEAT_TYPE_PCKMO: + case S390_FEAT_TYPE_PCC: + break; + default: + g_assert_not_reached(); + }; + + t_r1 = tcg_const_i32(r1); + t_r2 = tcg_const_i32(r2); + t_r3 = tcg_const_i32(r3); + type = tcg_const_i32(s->insn->data); + gen_helper_msa(cc_op, cpu_env, t_r1, t_r2, t_r3, type); + set_cc_static(s); + tcg_temp_free_i32(t_r1); + tcg_temp_free_i32(t_r2); + tcg_temp_free_i32(t_r3); + tcg_temp_free_i32(type); + return DISAS_NEXT; +} + +static DisasJumpType op_keb(DisasContext *s, DisasOps *o) +{ + gen_helper_keb(cc_op, cpu_env, o->in1, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_kdb(DisasContext *s, DisasOps *o) +{ + gen_helper_kdb(cc_op, cpu_env, o->in1, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_kxb(DisasContext *s, DisasOps *o) +{ + gen_helper_kxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_laa(DisasContext *s, DisasOps *o) +{ + /* The real output is indeed the original value in memory; + recompute the addition for the computation of CC. */ + tcg_gen_atomic_fetch_add_i64(o->in2, o->in2, o->in1, get_mem_index(s), + s->insn->data | MO_ALIGN); + /* However, we need to recompute the addition for setting CC. */ + tcg_gen_add_i64(o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_lan(DisasContext *s, DisasOps *o) +{ + /* The real output is indeed the original value in memory; + recompute the addition for the computation of CC. */ + tcg_gen_atomic_fetch_and_i64(o->in2, o->in2, o->in1, get_mem_index(s), + s->insn->data | MO_ALIGN); + /* However, we need to recompute the operation for setting CC. */ + tcg_gen_and_i64(o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_lao(DisasContext *s, DisasOps *o) +{ + /* The real output is indeed the original value in memory; + recompute the addition for the computation of CC. */ + tcg_gen_atomic_fetch_or_i64(o->in2, o->in2, o->in1, get_mem_index(s), + s->insn->data | MO_ALIGN); + /* However, we need to recompute the operation for setting CC. */ + tcg_gen_or_i64(o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_lax(DisasContext *s, DisasOps *o) +{ + /* The real output is indeed the original value in memory; + recompute the addition for the computation of CC. */ + tcg_gen_atomic_fetch_xor_i64(o->in2, o->in2, o->in1, get_mem_index(s), + s->insn->data | MO_ALIGN); + /* However, we need to recompute the operation for setting CC. */ + tcg_gen_xor_i64(o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_ldeb(DisasContext *s, DisasOps *o) +{ + gen_helper_ldeb(o->out, cpu_env, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_ledb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, true, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_ledb(o->out, cpu_env, o->in2, m34); + tcg_temp_free_i32(m34); + return DISAS_NEXT; +} + +static DisasJumpType op_ldxb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, true, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_ldxb(o->out, cpu_env, o->in1, o->in2, m34); + tcg_temp_free_i32(m34); + return DISAS_NEXT; +} + +static DisasJumpType op_lexb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 m34 = fpinst_extract_m34(s, true, true); + + if (!m34) { + return DISAS_NORETURN; + } + gen_helper_lexb(o->out, cpu_env, o->in1, o->in2, m34); + tcg_temp_free_i32(m34); + return DISAS_NEXT; +} + +static DisasJumpType op_lxdb(DisasContext *s, DisasOps *o) +{ + gen_helper_lxdb(o->out, cpu_env, o->in2); + return_low128(o->out2); + return DISAS_NEXT; +} + +static DisasJumpType op_lxeb(DisasContext *s, DisasOps *o) +{ + gen_helper_lxeb(o->out, cpu_env, o->in2); + return_low128(o->out2); + return DISAS_NEXT; +} + +static DisasJumpType op_lde(DisasContext *s, DisasOps *o) +{ + tcg_gen_shli_i64(o->out, o->in2, 32); + return DISAS_NEXT; +} + +static DisasJumpType op_llgt(DisasContext *s, DisasOps *o) +{ + tcg_gen_andi_i64(o->out, o->in2, 0x7fffffff); + return DISAS_NEXT; +} + +static DisasJumpType op_ld8s(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_ld8s(o->out, o->in2, get_mem_index(s)); + return DISAS_NEXT; +} + +static DisasJumpType op_ld8u(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_ld8u(o->out, o->in2, get_mem_index(s)); + return DISAS_NEXT; +} + +static DisasJumpType op_ld16s(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_ld16s(o->out, o->in2, get_mem_index(s)); + return DISAS_NEXT; +} + +static DisasJumpType op_ld16u(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_ld16u(o->out, o->in2, get_mem_index(s)); + return DISAS_NEXT; +} + +static DisasJumpType op_ld32s(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_ld32s(o->out, o->in2, get_mem_index(s)); + return DISAS_NEXT; +} + +static DisasJumpType op_ld32u(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_ld32u(o->out, o->in2, get_mem_index(s)); + return DISAS_NEXT; +} + +static DisasJumpType op_ld64(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_ld64(o->out, o->in2, get_mem_index(s)); + return DISAS_NEXT; +} + +static DisasJumpType op_lat(DisasContext *s, DisasOps *o) +{ + TCGLabel *lab = gen_new_label(); + store_reg32_i64(get_field(s, r1), o->in2); + /* The value is stored even in case of trap. */ + tcg_gen_brcondi_i64(TCG_COND_NE, o->in2, 0, lab); + gen_trap(s); + gen_set_label(lab); + return DISAS_NEXT; +} + +static DisasJumpType op_lgat(DisasContext *s, DisasOps *o) +{ + TCGLabel *lab = gen_new_label(); + tcg_gen_qemu_ld64(o->out, o->in2, get_mem_index(s)); + /* The value is stored even in case of trap. */ + tcg_gen_brcondi_i64(TCG_COND_NE, o->out, 0, lab); + gen_trap(s); + gen_set_label(lab); + return DISAS_NEXT; +} + +static DisasJumpType op_lfhat(DisasContext *s, DisasOps *o) +{ + TCGLabel *lab = gen_new_label(); + store_reg32h_i64(get_field(s, r1), o->in2); + /* The value is stored even in case of trap. */ + tcg_gen_brcondi_i64(TCG_COND_NE, o->in2, 0, lab); + gen_trap(s); + gen_set_label(lab); + return DISAS_NEXT; +} + +static DisasJumpType op_llgfat(DisasContext *s, DisasOps *o) +{ + TCGLabel *lab = gen_new_label(); + tcg_gen_qemu_ld32u(o->out, o->in2, get_mem_index(s)); + /* The value is stored even in case of trap. */ + tcg_gen_brcondi_i64(TCG_COND_NE, o->out, 0, lab); + gen_trap(s); + gen_set_label(lab); + return DISAS_NEXT; +} + +static DisasJumpType op_llgtat(DisasContext *s, DisasOps *o) +{ + TCGLabel *lab = gen_new_label(); + tcg_gen_andi_i64(o->out, o->in2, 0x7fffffff); + /* The value is stored even in case of trap. */ + tcg_gen_brcondi_i64(TCG_COND_NE, o->out, 0, lab); + gen_trap(s); + gen_set_label(lab); + return DISAS_NEXT; +} + +static DisasJumpType op_loc(DisasContext *s, DisasOps *o) +{ + DisasCompare c; + + disas_jcc(s, &c, get_field(s, m3)); + + if (c.is_64) { + tcg_gen_movcond_i64(c.cond, o->out, c.u.s64.a, c.u.s64.b, + o->in2, o->in1); + free_compare(&c); + } else { + TCGv_i32 t32 = tcg_temp_new_i32(); + TCGv_i64 t, z; + + tcg_gen_setcond_i32(c.cond, t32, c.u.s32.a, c.u.s32.b); + free_compare(&c); + + t = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(t, t32); + tcg_temp_free_i32(t32); + + z = tcg_const_i64(0); + tcg_gen_movcond_i64(TCG_COND_NE, o->out, t, z, o->in2, o->in1); + tcg_temp_free_i64(t); + tcg_temp_free_i64(z); + } + + return DISAS_NEXT; +} + +#ifndef CONFIG_USER_ONLY +static DisasJumpType op_lctl(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r3 = tcg_const_i32(get_field(s, r3)); + gen_helper_lctl(cpu_env, r1, o->in2, r3); + tcg_temp_free_i32(r1); + tcg_temp_free_i32(r3); + /* Exit to main loop to reevaluate s390_cpu_exec_interrupt. */ + return DISAS_PC_STALE_NOCHAIN; +} + +static DisasJumpType op_lctlg(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r3 = tcg_const_i32(get_field(s, r3)); + gen_helper_lctlg(cpu_env, r1, o->in2, r3); + tcg_temp_free_i32(r1); + tcg_temp_free_i32(r3); + /* Exit to main loop to reevaluate s390_cpu_exec_interrupt. */ + return DISAS_PC_STALE_NOCHAIN; +} + +static DisasJumpType op_lra(DisasContext *s, DisasOps *o) +{ + gen_helper_lra(o->out, cpu_env, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_lpp(DisasContext *s, DisasOps *o) +{ + tcg_gen_st_i64(o->in2, cpu_env, offsetof(CPUS390XState, pp)); + return DISAS_NEXT; +} + +static DisasJumpType op_lpsw(DisasContext *s, DisasOps *o) +{ + TCGv_i64 t1, t2; + + per_breaking_event(s); + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s), + MO_TEUL | MO_ALIGN_8); + tcg_gen_addi_i64(o->in2, o->in2, 4); + tcg_gen_qemu_ld32u(t2, o->in2, get_mem_index(s)); + /* Convert the 32-bit PSW_MASK into the 64-bit PSW_MASK. */ + tcg_gen_shli_i64(t1, t1, 32); + gen_helper_load_psw(cpu_env, t1, t2); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + return DISAS_NORETURN; +} + +static DisasJumpType op_lpswe(DisasContext *s, DisasOps *o) +{ + TCGv_i64 t1, t2; + + per_breaking_event(s); + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s), + MO_TEQ | MO_ALIGN_8); + tcg_gen_addi_i64(o->in2, o->in2, 8); + tcg_gen_qemu_ld64(t2, o->in2, get_mem_index(s)); + gen_helper_load_psw(cpu_env, t1, t2); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + return DISAS_NORETURN; +} +#endif + +static DisasJumpType op_lam(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r3 = tcg_const_i32(get_field(s, r3)); + gen_helper_lam(cpu_env, r1, o->in2, r3); + tcg_temp_free_i32(r1); + tcg_temp_free_i32(r3); + return DISAS_NEXT; +} + +static DisasJumpType op_lm32(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r3 = get_field(s, r3); + TCGv_i64 t1, t2; + + /* Only one register to read. */ + t1 = tcg_temp_new_i64(); + if (unlikely(r1 == r3)) { + tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s)); + store_reg32_i64(r1, t1); + tcg_temp_free(t1); + return DISAS_NEXT; + } + + /* First load the values of the first and last registers to trigger + possible page faults. */ + t2 = tcg_temp_new_i64(); + tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s)); + tcg_gen_addi_i64(t2, o->in2, 4 * ((r3 - r1) & 15)); + tcg_gen_qemu_ld32u(t2, t2, get_mem_index(s)); + store_reg32_i64(r1, t1); + store_reg32_i64(r3, t2); + + /* Only two registers to read. */ + if (((r1 + 1) & 15) == r3) { + tcg_temp_free(t2); + tcg_temp_free(t1); + return DISAS_NEXT; + } + + /* Then load the remaining registers. Page fault can't occur. */ + r3 = (r3 - 1) & 15; + tcg_gen_movi_i64(t2, 4); + while (r1 != r3) { + r1 = (r1 + 1) & 15; + tcg_gen_add_i64(o->in2, o->in2, t2); + tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s)); + store_reg32_i64(r1, t1); + } + tcg_temp_free(t2); + tcg_temp_free(t1); + + return DISAS_NEXT; +} + +static DisasJumpType op_lmh(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r3 = get_field(s, r3); + TCGv_i64 t1, t2; + + /* Only one register to read. */ + t1 = tcg_temp_new_i64(); + if (unlikely(r1 == r3)) { + tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s)); + store_reg32h_i64(r1, t1); + tcg_temp_free(t1); + return DISAS_NEXT; + } + + /* First load the values of the first and last registers to trigger + possible page faults. */ + t2 = tcg_temp_new_i64(); + tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s)); + tcg_gen_addi_i64(t2, o->in2, 4 * ((r3 - r1) & 15)); + tcg_gen_qemu_ld32u(t2, t2, get_mem_index(s)); + store_reg32h_i64(r1, t1); + store_reg32h_i64(r3, t2); + + /* Only two registers to read. */ + if (((r1 + 1) & 15) == r3) { + tcg_temp_free(t2); + tcg_temp_free(t1); + return DISAS_NEXT; + } + + /* Then load the remaining registers. Page fault can't occur. */ + r3 = (r3 - 1) & 15; + tcg_gen_movi_i64(t2, 4); + while (r1 != r3) { + r1 = (r1 + 1) & 15; + tcg_gen_add_i64(o->in2, o->in2, t2); + tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s)); + store_reg32h_i64(r1, t1); + } + tcg_temp_free(t2); + tcg_temp_free(t1); + + return DISAS_NEXT; +} + +static DisasJumpType op_lm64(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r3 = get_field(s, r3); + TCGv_i64 t1, t2; + + /* Only one register to read. */ + if (unlikely(r1 == r3)) { + tcg_gen_qemu_ld64(regs[r1], o->in2, get_mem_index(s)); + return DISAS_NEXT; + } + + /* First load the values of the first and last registers to trigger + possible page faults. */ + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_qemu_ld64(t1, o->in2, get_mem_index(s)); + tcg_gen_addi_i64(t2, o->in2, 8 * ((r3 - r1) & 15)); + tcg_gen_qemu_ld64(regs[r3], t2, get_mem_index(s)); + tcg_gen_mov_i64(regs[r1], t1); + tcg_temp_free(t2); + + /* Only two registers to read. */ + if (((r1 + 1) & 15) == r3) { + tcg_temp_free(t1); + return DISAS_NEXT; + } + + /* Then load the remaining registers. Page fault can't occur. */ + r3 = (r3 - 1) & 15; + tcg_gen_movi_i64(t1, 8); + while (r1 != r3) { + r1 = (r1 + 1) & 15; + tcg_gen_add_i64(o->in2, o->in2, t1); + tcg_gen_qemu_ld64(regs[r1], o->in2, get_mem_index(s)); + } + tcg_temp_free(t1); + + return DISAS_NEXT; +} + +static DisasJumpType op_lpd(DisasContext *s, DisasOps *o) +{ + TCGv_i64 a1, a2; + MemOp mop = s->insn->data; + + /* In a parallel context, stop the world and single step. */ + if (tb_cflags(s->base.tb) & CF_PARALLEL) { + update_psw_addr(s); + update_cc_op(s); + gen_exception(EXCP_ATOMIC); + return DISAS_NORETURN; + } + + /* In a serial context, perform the two loads ... */ + a1 = get_address(s, 0, get_field(s, b1), get_field(s, d1)); + a2 = get_address(s, 0, get_field(s, b2), get_field(s, d2)); + tcg_gen_qemu_ld_i64(o->out, a1, get_mem_index(s), mop | MO_ALIGN); + tcg_gen_qemu_ld_i64(o->out2, a2, get_mem_index(s), mop | MO_ALIGN); + tcg_temp_free_i64(a1); + tcg_temp_free_i64(a2); + + /* ... and indicate that we performed them while interlocked. */ + gen_op_movi_cc(s, 0); + return DISAS_NEXT; +} + +static DisasJumpType op_lpq(DisasContext *s, DisasOps *o) +{ + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_lpq(o->out, cpu_env, o->in2); + } else if (HAVE_ATOMIC128) { + gen_helper_lpq_parallel(o->out, cpu_env, o->in2); + } else { + gen_helper_exit_atomic(cpu_env); + return DISAS_NORETURN; + } + return_low128(o->out2); + return DISAS_NEXT; +} + +#ifndef CONFIG_USER_ONLY +static DisasJumpType op_lura(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_ld_tl(o->out, o->in2, MMU_REAL_IDX, s->insn->data); + return DISAS_NEXT; +} +#endif + +static DisasJumpType op_lzrb(DisasContext *s, DisasOps *o) +{ + tcg_gen_andi_i64(o->out, o->in2, -256); + return DISAS_NEXT; +} + +static DisasJumpType op_lcbb(DisasContext *s, DisasOps *o) +{ + const int64_t block_size = (1ull << (get_field(s, m3) + 6)); + + if (get_field(s, m3) > 6) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tcg_gen_ori_i64(o->addr1, o->addr1, -block_size); + tcg_gen_neg_i64(o->addr1, o->addr1); + tcg_gen_movi_i64(o->out, 16); + tcg_gen_umin_i64(o->out, o->out, o->addr1); + gen_op_update1_cc_i64(s, CC_OP_LCBB, o->out); + return DISAS_NEXT; +} + +static DisasJumpType op_mc(DisasContext *s, DisasOps *o) +{ +#if !defined(CONFIG_USER_ONLY) + TCGv_i32 i2; +#endif + const uint16_t monitor_class = get_field(s, i2); + + if (monitor_class & 0xff00) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + +#if !defined(CONFIG_USER_ONLY) + i2 = tcg_const_i32(monitor_class); + gen_helper_monitor_call(cpu_env, o->addr1, i2); + tcg_temp_free_i32(i2); +#endif + /* Defaults to a NOP. */ + return DISAS_NEXT; +} + +static DisasJumpType op_mov2(DisasContext *s, DisasOps *o) +{ + o->out = o->in2; + o->g_out = o->g_in2; + o->in2 = NULL; + o->g_in2 = false; + return DISAS_NEXT; +} + +static DisasJumpType op_mov2e(DisasContext *s, DisasOps *o) +{ + int b2 = get_field(s, b2); + TCGv ar1 = tcg_temp_new_i64(); + + o->out = o->in2; + o->g_out = o->g_in2; + o->in2 = NULL; + o->g_in2 = false; + + switch (s->base.tb->flags & FLAG_MASK_ASC) { + case PSW_ASC_PRIMARY >> FLAG_MASK_PSW_SHIFT: + tcg_gen_movi_i64(ar1, 0); + break; + case PSW_ASC_ACCREG >> FLAG_MASK_PSW_SHIFT: + tcg_gen_movi_i64(ar1, 1); + break; + case PSW_ASC_SECONDARY >> FLAG_MASK_PSW_SHIFT: + if (b2) { + tcg_gen_ld32u_i64(ar1, cpu_env, offsetof(CPUS390XState, aregs[b2])); + } else { + tcg_gen_movi_i64(ar1, 0); + } + break; + case PSW_ASC_HOME >> FLAG_MASK_PSW_SHIFT: + tcg_gen_movi_i64(ar1, 2); + break; + } + + tcg_gen_st32_i64(ar1, cpu_env, offsetof(CPUS390XState, aregs[1])); + tcg_temp_free_i64(ar1); + + return DISAS_NEXT; +} + +static DisasJumpType op_movx(DisasContext *s, DisasOps *o) +{ + o->out = o->in1; + o->out2 = o->in2; + o->g_out = o->g_in1; + o->g_out2 = o->g_in2; + o->in1 = NULL; + o->in2 = NULL; + o->g_in1 = o->g_in2 = false; + return DISAS_NEXT; +} + +static DisasJumpType op_mvc(DisasContext *s, DisasOps *o) +{ + TCGv_i32 l = tcg_const_i32(get_field(s, l1)); + gen_helper_mvc(cpu_env, l, o->addr1, o->in2); + tcg_temp_free_i32(l); + return DISAS_NEXT; +} + +static DisasJumpType op_mvcin(DisasContext *s, DisasOps *o) +{ + TCGv_i32 l = tcg_const_i32(get_field(s, l1)); + gen_helper_mvcin(cpu_env, l, o->addr1, o->in2); + tcg_temp_free_i32(l); + return DISAS_NEXT; +} + +static DisasJumpType op_mvcl(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r2 = get_field(s, r2); + TCGv_i32 t1, t2; + + /* r1 and r2 must be even. */ + if (r1 & 1 || r2 & 1) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + t1 = tcg_const_i32(r1); + t2 = tcg_const_i32(r2); + gen_helper_mvcl(cc_op, cpu_env, t1, t2); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_mvcle(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r3 = get_field(s, r3); + TCGv_i32 t1, t3; + + /* r1 and r3 must be even. */ + if (r1 & 1 || r3 & 1) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + t1 = tcg_const_i32(r1); + t3 = tcg_const_i32(r3); + gen_helper_mvcle(cc_op, cpu_env, t1, o->in2, t3); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t3); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_mvclu(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r3 = get_field(s, r3); + TCGv_i32 t1, t3; + + /* r1 and r3 must be even. */ + if (r1 & 1 || r3 & 1) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + t1 = tcg_const_i32(r1); + t3 = tcg_const_i32(r3); + gen_helper_mvclu(cc_op, cpu_env, t1, o->in2, t3); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t3); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_mvcos(DisasContext *s, DisasOps *o) +{ + int r3 = get_field(s, r3); + gen_helper_mvcos(cc_op, cpu_env, o->addr1, o->in2, regs[r3]); + set_cc_static(s); + return DISAS_NEXT; +} + +#ifndef CONFIG_USER_ONLY +static DisasJumpType op_mvcp(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, l1); + gen_helper_mvcp(cc_op, cpu_env, regs[r1], o->addr1, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_mvcs(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, l1); + gen_helper_mvcs(cc_op, cpu_env, regs[r1], o->addr1, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} +#endif + +static DisasJumpType op_mvn(DisasContext *s, DisasOps *o) +{ + TCGv_i32 l = tcg_const_i32(get_field(s, l1)); + gen_helper_mvn(cpu_env, l, o->addr1, o->in2); + tcg_temp_free_i32(l); + return DISAS_NEXT; +} + +static DisasJumpType op_mvo(DisasContext *s, DisasOps *o) +{ + TCGv_i32 l = tcg_const_i32(get_field(s, l1)); + gen_helper_mvo(cpu_env, l, o->addr1, o->in2); + tcg_temp_free_i32(l); + return DISAS_NEXT; +} + +static DisasJumpType op_mvpg(DisasContext *s, DisasOps *o) +{ + TCGv_i32 t1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 t2 = tcg_const_i32(get_field(s, r2)); + + gen_helper_mvpg(cc_op, cpu_env, regs[0], t1, t2); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_mvst(DisasContext *s, DisasOps *o) +{ + TCGv_i32 t1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 t2 = tcg_const_i32(get_field(s, r2)); + + gen_helper_mvst(cc_op, cpu_env, t1, t2); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_mvz(DisasContext *s, DisasOps *o) +{ + TCGv_i32 l = tcg_const_i32(get_field(s, l1)); + gen_helper_mvz(cpu_env, l, o->addr1, o->in2); + tcg_temp_free_i32(l); + return DISAS_NEXT; +} + +static DisasJumpType op_mul(DisasContext *s, DisasOps *o) +{ + tcg_gen_mul_i64(o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_mul128(DisasContext *s, DisasOps *o) +{ + tcg_gen_mulu2_i64(o->out2, o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_muls128(DisasContext *s, DisasOps *o) +{ + tcg_gen_muls2_i64(o->out2, o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_meeb(DisasContext *s, DisasOps *o) +{ + gen_helper_meeb(o->out, cpu_env, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_mdeb(DisasContext *s, DisasOps *o) +{ + gen_helper_mdeb(o->out, cpu_env, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_mdb(DisasContext *s, DisasOps *o) +{ + gen_helper_mdb(o->out, cpu_env, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_mxb(DisasContext *s, DisasOps *o) +{ + gen_helper_mxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2); + return_low128(o->out2); + return DISAS_NEXT; +} + +static DisasJumpType op_mxdb(DisasContext *s, DisasOps *o) +{ + gen_helper_mxdb(o->out, cpu_env, o->out, o->out2, o->in2); + return_low128(o->out2); + return DISAS_NEXT; +} + +static DisasJumpType op_maeb(DisasContext *s, DisasOps *o) +{ + TCGv_i64 r3 = load_freg32_i64(get_field(s, r3)); + gen_helper_maeb(o->out, cpu_env, o->in1, o->in2, r3); + tcg_temp_free_i64(r3); + return DISAS_NEXT; +} + +static DisasJumpType op_madb(DisasContext *s, DisasOps *o) +{ + TCGv_i64 r3 = load_freg(get_field(s, r3)); + gen_helper_madb(o->out, cpu_env, o->in1, o->in2, r3); + tcg_temp_free_i64(r3); + return DISAS_NEXT; +} + +static DisasJumpType op_mseb(DisasContext *s, DisasOps *o) +{ + TCGv_i64 r3 = load_freg32_i64(get_field(s, r3)); + gen_helper_mseb(o->out, cpu_env, o->in1, o->in2, r3); + tcg_temp_free_i64(r3); + return DISAS_NEXT; +} + +static DisasJumpType op_msdb(DisasContext *s, DisasOps *o) +{ + TCGv_i64 r3 = load_freg(get_field(s, r3)); + gen_helper_msdb(o->out, cpu_env, o->in1, o->in2, r3); + tcg_temp_free_i64(r3); + return DISAS_NEXT; +} + +static DisasJumpType op_nabs(DisasContext *s, DisasOps *o) +{ + TCGv_i64 z, n; + z = tcg_const_i64(0); + n = tcg_temp_new_i64(); + tcg_gen_neg_i64(n, o->in2); + tcg_gen_movcond_i64(TCG_COND_GE, o->out, o->in2, z, n, o->in2); + tcg_temp_free_i64(n); + tcg_temp_free_i64(z); + return DISAS_NEXT; +} + +static DisasJumpType op_nabsf32(DisasContext *s, DisasOps *o) +{ + tcg_gen_ori_i64(o->out, o->in2, 0x80000000ull); + return DISAS_NEXT; +} + +static DisasJumpType op_nabsf64(DisasContext *s, DisasOps *o) +{ + tcg_gen_ori_i64(o->out, o->in2, 0x8000000000000000ull); + return DISAS_NEXT; +} + +static DisasJumpType op_nabsf128(DisasContext *s, DisasOps *o) +{ + tcg_gen_ori_i64(o->out, o->in1, 0x8000000000000000ull); + tcg_gen_mov_i64(o->out2, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_nc(DisasContext *s, DisasOps *o) +{ + TCGv_i32 l = tcg_const_i32(get_field(s, l1)); + gen_helper_nc(cc_op, cpu_env, l, o->addr1, o->in2); + tcg_temp_free_i32(l); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_neg(DisasContext *s, DisasOps *o) +{ + tcg_gen_neg_i64(o->out, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_negf32(DisasContext *s, DisasOps *o) +{ + tcg_gen_xori_i64(o->out, o->in2, 0x80000000ull); + return DISAS_NEXT; +} + +static DisasJumpType op_negf64(DisasContext *s, DisasOps *o) +{ + tcg_gen_xori_i64(o->out, o->in2, 0x8000000000000000ull); + return DISAS_NEXT; +} + +static DisasJumpType op_negf128(DisasContext *s, DisasOps *o) +{ + tcg_gen_xori_i64(o->out, o->in1, 0x8000000000000000ull); + tcg_gen_mov_i64(o->out2, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_oc(DisasContext *s, DisasOps *o) +{ + TCGv_i32 l = tcg_const_i32(get_field(s, l1)); + gen_helper_oc(cc_op, cpu_env, l, o->addr1, o->in2); + tcg_temp_free_i32(l); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_or(DisasContext *s, DisasOps *o) +{ + tcg_gen_or_i64(o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_ori(DisasContext *s, DisasOps *o) +{ + int shift = s->insn->data & 0xff; + int size = s->insn->data >> 8; + uint64_t mask = ((1ull << size) - 1) << shift; + + assert(!o->g_in2); + tcg_gen_shli_i64(o->in2, o->in2, shift); + tcg_gen_or_i64(o->out, o->in1, o->in2); + + /* Produce the CC from only the bits manipulated. */ + tcg_gen_andi_i64(cc_dst, o->out, mask); + set_cc_nz_u64(s, cc_dst); + return DISAS_NEXT; +} + +static DisasJumpType op_oi(DisasContext *s, DisasOps *o) +{ + o->in1 = tcg_temp_new_i64(); + + if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) { + tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data); + } else { + /* Perform the atomic operation in memory. */ + tcg_gen_atomic_fetch_or_i64(o->in1, o->addr1, o->in2, get_mem_index(s), + s->insn->data); + } + + /* Recompute also for atomic case: needed for setting CC. */ + tcg_gen_or_i64(o->out, o->in1, o->in2); + + if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) { + tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data); + } + return DISAS_NEXT; +} + +static DisasJumpType op_pack(DisasContext *s, DisasOps *o) +{ + TCGv_i32 l = tcg_const_i32(get_field(s, l1)); + gen_helper_pack(cpu_env, l, o->addr1, o->in2); + tcg_temp_free_i32(l); + return DISAS_NEXT; +} + +static DisasJumpType op_pka(DisasContext *s, DisasOps *o) +{ + int l2 = get_field(s, l2) + 1; + TCGv_i32 l; + + /* The length must not exceed 32 bytes. */ + if (l2 > 32) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + l = tcg_const_i32(l2); + gen_helper_pka(cpu_env, o->addr1, o->in2, l); + tcg_temp_free_i32(l); + return DISAS_NEXT; +} + +static DisasJumpType op_pku(DisasContext *s, DisasOps *o) +{ + int l2 = get_field(s, l2) + 1; + TCGv_i32 l; + + /* The length must be even and should not exceed 64 bytes. */ + if ((l2 & 1) || (l2 > 64)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + l = tcg_const_i32(l2); + gen_helper_pku(cpu_env, o->addr1, o->in2, l); + tcg_temp_free_i32(l); + return DISAS_NEXT; +} + +static DisasJumpType op_popcnt(DisasContext *s, DisasOps *o) +{ + gen_helper_popcnt(o->out, o->in2); + return DISAS_NEXT; +} + +#ifndef CONFIG_USER_ONLY +static DisasJumpType op_ptlb(DisasContext *s, DisasOps *o) +{ + gen_helper_ptlb(cpu_env); + return DISAS_NEXT; +} +#endif + +static DisasJumpType op_risbg(DisasContext *s, DisasOps *o) +{ + int i3 = get_field(s, i3); + int i4 = get_field(s, i4); + int i5 = get_field(s, i5); + int do_zero = i4 & 0x80; + uint64_t mask, imask, pmask; + int pos, len, rot; + + /* Adjust the arguments for the specific insn. */ + switch (s->fields.op2) { + case 0x55: /* risbg */ + case 0x59: /* risbgn */ + i3 &= 63; + i4 &= 63; + pmask = ~0; + break; + case 0x5d: /* risbhg */ + i3 &= 31; + i4 &= 31; + pmask = 0xffffffff00000000ull; + break; + case 0x51: /* risblg */ + i3 = (i3 & 31) + 32; + i4 = (i4 & 31) + 32; + pmask = 0x00000000ffffffffull; + break; + default: + g_assert_not_reached(); + } + + /* MASK is the set of bits to be inserted from R2. */ + if (i3 <= i4) { + /* [0...i3---i4...63] */ + mask = (-1ull >> i3) & (-1ull << (63 - i4)); + } else { + /* [0---i4...i3---63] */ + mask = (-1ull >> i3) | (-1ull << (63 - i4)); + } + /* For RISBLG/RISBHG, the wrapping is limited to the high/low doubleword. */ + mask &= pmask; + + /* IMASK is the set of bits to be kept from R1. In the case of the high/low + insns, we need to keep the other half of the register. */ + imask = ~mask | ~pmask; + if (do_zero) { + imask = ~pmask; + } + + len = i4 - i3 + 1; + pos = 63 - i4; + rot = i5 & 63; + + /* In some cases we can implement this with extract. */ + if (imask == 0 && pos == 0 && len > 0 && len <= rot) { + tcg_gen_extract_i64(o->out, o->in2, 64 - rot, len); + return DISAS_NEXT; + } + + /* In some cases we can implement this with deposit. */ + if (len > 0 && (imask == 0 || ~mask == imask)) { + /* Note that we rotate the bits to be inserted to the lsb, not to + the position as described in the PoO. */ + rot = (rot - pos) & 63; + } else { + pos = -1; + } + + /* Rotate the input as necessary. */ + tcg_gen_rotli_i64(o->in2, o->in2, rot); + + /* Insert the selected bits into the output. */ + if (pos >= 0) { + if (imask == 0) { + tcg_gen_deposit_z_i64(o->out, o->in2, pos, len); + } else { + tcg_gen_deposit_i64(o->out, o->out, o->in2, pos, len); + } + } else if (imask == 0) { + tcg_gen_andi_i64(o->out, o->in2, mask); + } else { + tcg_gen_andi_i64(o->in2, o->in2, mask); + tcg_gen_andi_i64(o->out, o->out, imask); + tcg_gen_or_i64(o->out, o->out, o->in2); + } + return DISAS_NEXT; +} + +static DisasJumpType op_rosbg(DisasContext *s, DisasOps *o) +{ + int i3 = get_field(s, i3); + int i4 = get_field(s, i4); + int i5 = get_field(s, i5); + uint64_t mask; + + /* If this is a test-only form, arrange to discard the result. */ + if (i3 & 0x80) { + o->out = tcg_temp_new_i64(); + o->g_out = false; + } + + i3 &= 63; + i4 &= 63; + i5 &= 63; + + /* MASK is the set of bits to be operated on from R2. + Take care for I3/I4 wraparound. */ + mask = ~0ull >> i3; + if (i3 <= i4) { + mask ^= ~0ull >> i4 >> 1; + } else { + mask |= ~(~0ull >> i4 >> 1); + } + + /* Rotate the input as necessary. */ + tcg_gen_rotli_i64(o->in2, o->in2, i5); + + /* Operate. */ + switch (s->fields.op2) { + case 0x54: /* AND */ + tcg_gen_ori_i64(o->in2, o->in2, ~mask); + tcg_gen_and_i64(o->out, o->out, o->in2); + break; + case 0x56: /* OR */ + tcg_gen_andi_i64(o->in2, o->in2, mask); + tcg_gen_or_i64(o->out, o->out, o->in2); + break; + case 0x57: /* XOR */ + tcg_gen_andi_i64(o->in2, o->in2, mask); + tcg_gen_xor_i64(o->out, o->out, o->in2); + break; + default: + abort(); + } + + /* Set the CC. */ + tcg_gen_andi_i64(cc_dst, o->out, mask); + set_cc_nz_u64(s, cc_dst); + return DISAS_NEXT; +} + +static DisasJumpType op_rev16(DisasContext *s, DisasOps *o) +{ + tcg_gen_bswap16_i64(o->out, o->in2, TCG_BSWAP_IZ | TCG_BSWAP_OZ); + return DISAS_NEXT; +} + +static DisasJumpType op_rev32(DisasContext *s, DisasOps *o) +{ + tcg_gen_bswap32_i64(o->out, o->in2, TCG_BSWAP_IZ | TCG_BSWAP_OZ); + return DISAS_NEXT; +} + +static DisasJumpType op_rev64(DisasContext *s, DisasOps *o) +{ + tcg_gen_bswap64_i64(o->out, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_rll32(DisasContext *s, DisasOps *o) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i32 to = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(t1, o->in1); + tcg_gen_extrl_i64_i32(t2, o->in2); + tcg_gen_rotl_i32(to, t1, t2); + tcg_gen_extu_i32_i64(o->out, to); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(to); + return DISAS_NEXT; +} + +static DisasJumpType op_rll64(DisasContext *s, DisasOps *o) +{ + tcg_gen_rotl_i64(o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +#ifndef CONFIG_USER_ONLY +static DisasJumpType op_rrbe(DisasContext *s, DisasOps *o) +{ + gen_helper_rrbe(cc_op, cpu_env, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_sacf(DisasContext *s, DisasOps *o) +{ + gen_helper_sacf(cpu_env, o->in2); + /* Addressing mode has changed, so end the block. */ + return DISAS_PC_STALE; +} +#endif + +static DisasJumpType op_sam(DisasContext *s, DisasOps *o) +{ + int sam = s->insn->data; + TCGv_i64 tsam; + uint64_t mask; + + switch (sam) { + case 0: + mask = 0xffffff; + break; + case 1: + mask = 0x7fffffff; + break; + default: + mask = -1; + break; + } + + /* Bizarre but true, we check the address of the current insn for the + specification exception, not the next to be executed. Thus the PoO + documents that Bad Things Happen two bytes before the end. */ + if (s->base.pc_next & ~mask) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + s->pc_tmp &= mask; + + tsam = tcg_const_i64(sam); + tcg_gen_deposit_i64(psw_mask, psw_mask, tsam, 31, 2); + tcg_temp_free_i64(tsam); + + /* Always exit the TB, since we (may have) changed execution mode. */ + return DISAS_PC_STALE; +} + +static DisasJumpType op_sar(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + tcg_gen_st32_i64(o->in2, cpu_env, offsetof(CPUS390XState, aregs[r1])); + return DISAS_NEXT; +} + +static DisasJumpType op_seb(DisasContext *s, DisasOps *o) +{ + gen_helper_seb(o->out, cpu_env, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_sdb(DisasContext *s, DisasOps *o) +{ + gen_helper_sdb(o->out, cpu_env, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_sxb(DisasContext *s, DisasOps *o) +{ + gen_helper_sxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2); + return_low128(o->out2); + return DISAS_NEXT; +} + +static DisasJumpType op_sqeb(DisasContext *s, DisasOps *o) +{ + gen_helper_sqeb(o->out, cpu_env, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_sqdb(DisasContext *s, DisasOps *o) +{ + gen_helper_sqdb(o->out, cpu_env, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_sqxb(DisasContext *s, DisasOps *o) +{ + gen_helper_sqxb(o->out, cpu_env, o->in1, o->in2); + return_low128(o->out2); + return DISAS_NEXT; +} + +#ifndef CONFIG_USER_ONLY +static DisasJumpType op_servc(DisasContext *s, DisasOps *o) +{ + gen_helper_servc(cc_op, cpu_env, o->in2, o->in1); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_sigp(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r3 = tcg_const_i32(get_field(s, r3)); + gen_helper_sigp(cc_op, cpu_env, o->in2, r1, r3); + set_cc_static(s); + tcg_temp_free_i32(r1); + tcg_temp_free_i32(r3); + return DISAS_NEXT; +} +#endif + +static DisasJumpType op_soc(DisasContext *s, DisasOps *o) +{ + DisasCompare c; + TCGv_i64 a, h; + TCGLabel *lab; + int r1; + + disas_jcc(s, &c, get_field(s, m3)); + + /* We want to store when the condition is fulfilled, so branch + out when it's not */ + c.cond = tcg_invert_cond(c.cond); + + lab = gen_new_label(); + if (c.is_64) { + tcg_gen_brcond_i64(c.cond, c.u.s64.a, c.u.s64.b, lab); + } else { + tcg_gen_brcond_i32(c.cond, c.u.s32.a, c.u.s32.b, lab); + } + free_compare(&c); + + r1 = get_field(s, r1); + a = get_address(s, 0, get_field(s, b2), get_field(s, d2)); + switch (s->insn->data) { + case 1: /* STOCG */ + tcg_gen_qemu_st64(regs[r1], a, get_mem_index(s)); + break; + case 0: /* STOC */ + tcg_gen_qemu_st32(regs[r1], a, get_mem_index(s)); + break; + case 2: /* STOCFH */ + h = tcg_temp_new_i64(); + tcg_gen_shri_i64(h, regs[r1], 32); + tcg_gen_qemu_st32(h, a, get_mem_index(s)); + tcg_temp_free_i64(h); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_i64(a); + + gen_set_label(lab); + return DISAS_NEXT; +} + +static DisasJumpType op_sla(DisasContext *s, DisasOps *o) +{ + uint64_t sign = 1ull << s->insn->data; + enum cc_op cco = s->insn->data == 31 ? CC_OP_SLA_32 : CC_OP_SLA_64; + gen_op_update2_cc_i64(s, cco, o->in1, o->in2); + tcg_gen_shl_i64(o->out, o->in1, o->in2); + /* The arithmetic left shift is curious in that it does not affect + the sign bit. Copy that over from the source unchanged. */ + tcg_gen_andi_i64(o->out, o->out, ~sign); + tcg_gen_andi_i64(o->in1, o->in1, sign); + tcg_gen_or_i64(o->out, o->out, o->in1); + return DISAS_NEXT; +} + +static DisasJumpType op_sll(DisasContext *s, DisasOps *o) +{ + tcg_gen_shl_i64(o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_sra(DisasContext *s, DisasOps *o) +{ + tcg_gen_sar_i64(o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_srl(DisasContext *s, DisasOps *o) +{ + tcg_gen_shr_i64(o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_sfpc(DisasContext *s, DisasOps *o) +{ + gen_helper_sfpc(cpu_env, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_sfas(DisasContext *s, DisasOps *o) +{ + gen_helper_sfas(cpu_env, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_srnm(DisasContext *s, DisasOps *o) +{ + /* Bits other than 62 and 63 are ignored. Bit 29 is set to zero. */ + tcg_gen_andi_i64(o->addr1, o->addr1, 0x3ull); + gen_helper_srnm(cpu_env, o->addr1); + return DISAS_NEXT; +} + +static DisasJumpType op_srnmb(DisasContext *s, DisasOps *o) +{ + /* Bits 0-55 are are ignored. */ + tcg_gen_andi_i64(o->addr1, o->addr1, 0xffull); + gen_helper_srnm(cpu_env, o->addr1); + return DISAS_NEXT; +} + +static DisasJumpType op_srnmt(DisasContext *s, DisasOps *o) +{ + TCGv_i64 tmp = tcg_temp_new_i64(); + + /* Bits other than 61-63 are ignored. */ + tcg_gen_andi_i64(o->addr1, o->addr1, 0x7ull); + + /* No need to call a helper, we don't implement dfp */ + tcg_gen_ld32u_i64(tmp, cpu_env, offsetof(CPUS390XState, fpc)); + tcg_gen_deposit_i64(tmp, tmp, o->addr1, 4, 3); + tcg_gen_st32_i64(tmp, cpu_env, offsetof(CPUS390XState, fpc)); + + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_spm(DisasContext *s, DisasOps *o) +{ + tcg_gen_extrl_i64_i32(cc_op, o->in1); + tcg_gen_extract_i32(cc_op, cc_op, 28, 2); + set_cc_static(s); + + tcg_gen_shri_i64(o->in1, o->in1, 24); + tcg_gen_deposit_i64(psw_mask, psw_mask, o->in1, PSW_SHIFT_MASK_PM, 4); + return DISAS_NEXT; +} + +static DisasJumpType op_ectg(DisasContext *s, DisasOps *o) +{ + int b1 = get_field(s, b1); + int d1 = get_field(s, d1); + int b2 = get_field(s, b2); + int d2 = get_field(s, d2); + int r3 = get_field(s, r3); + TCGv_i64 tmp = tcg_temp_new_i64(); + + /* fetch all operands first */ + o->in1 = tcg_temp_new_i64(); + tcg_gen_addi_i64(o->in1, regs[b1], d1); + o->in2 = tcg_temp_new_i64(); + tcg_gen_addi_i64(o->in2, regs[b2], d2); + o->addr1 = tcg_temp_new_i64(); + gen_addi_and_wrap_i64(s, o->addr1, regs[r3], 0); + + /* load the third operand into r3 before modifying anything */ + tcg_gen_qemu_ld64(regs[r3], o->addr1, get_mem_index(s)); + + /* subtract CPU timer from first operand and store in GR0 */ + gen_helper_stpt(tmp, cpu_env); + tcg_gen_sub_i64(regs[0], o->in1, tmp); + + /* store second operand in GR1 */ + tcg_gen_mov_i64(regs[1], o->in2); + + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +#ifndef CONFIG_USER_ONLY +static DisasJumpType op_spka(DisasContext *s, DisasOps *o) +{ + tcg_gen_shri_i64(o->in2, o->in2, 4); + tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, PSW_SHIFT_KEY, 4); + return DISAS_NEXT; +} + +static DisasJumpType op_sske(DisasContext *s, DisasOps *o) +{ + gen_helper_sske(cpu_env, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_ssm(DisasContext *s, DisasOps *o) +{ + tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, 56, 8); + /* Exit to main loop to reevaluate s390_cpu_exec_interrupt. */ + return DISAS_PC_STALE_NOCHAIN; +} + +static DisasJumpType op_stap(DisasContext *s, DisasOps *o) +{ + tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, core_id)); + return DISAS_NEXT; +} +#endif + +static DisasJumpType op_stck(DisasContext *s, DisasOps *o) +{ + gen_helper_stck(o->out, cpu_env); + /* ??? We don't implement clock states. */ + gen_op_movi_cc(s, 0); + return DISAS_NEXT; +} + +static DisasJumpType op_stcke(DisasContext *s, DisasOps *o) +{ + TCGv_i64 c1 = tcg_temp_new_i64(); + TCGv_i64 c2 = tcg_temp_new_i64(); + TCGv_i64 todpr = tcg_temp_new_i64(); + gen_helper_stck(c1, cpu_env); + /* 16 bit value store in an uint32_t (only valid bits set) */ + tcg_gen_ld32u_i64(todpr, cpu_env, offsetof(CPUS390XState, todpr)); + /* Shift the 64-bit value into its place as a zero-extended + 104-bit value. Note that "bit positions 64-103 are always + non-zero so that they compare differently to STCK"; we set + the least significant bit to 1. */ + tcg_gen_shli_i64(c2, c1, 56); + tcg_gen_shri_i64(c1, c1, 8); + tcg_gen_ori_i64(c2, c2, 0x10000); + tcg_gen_or_i64(c2, c2, todpr); + tcg_gen_qemu_st64(c1, o->in2, get_mem_index(s)); + tcg_gen_addi_i64(o->in2, o->in2, 8); + tcg_gen_qemu_st64(c2, o->in2, get_mem_index(s)); + tcg_temp_free_i64(c1); + tcg_temp_free_i64(c2); + tcg_temp_free_i64(todpr); + /* ??? We don't implement clock states. */ + gen_op_movi_cc(s, 0); + return DISAS_NEXT; +} + +#ifndef CONFIG_USER_ONLY +static DisasJumpType op_sck(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_ld_i64(o->in1, o->addr1, get_mem_index(s), MO_TEQ | MO_ALIGN); + gen_helper_sck(cc_op, cpu_env, o->in1); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_sckc(DisasContext *s, DisasOps *o) +{ + gen_helper_sckc(cpu_env, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_sckpf(DisasContext *s, DisasOps *o) +{ + gen_helper_sckpf(cpu_env, regs[0]); + return DISAS_NEXT; +} + +static DisasJumpType op_stckc(DisasContext *s, DisasOps *o) +{ + gen_helper_stckc(o->out, cpu_env); + return DISAS_NEXT; +} + +static DisasJumpType op_stctg(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r3 = tcg_const_i32(get_field(s, r3)); + gen_helper_stctg(cpu_env, r1, o->in2, r3); + tcg_temp_free_i32(r1); + tcg_temp_free_i32(r3); + return DISAS_NEXT; +} + +static DisasJumpType op_stctl(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r3 = tcg_const_i32(get_field(s, r3)); + gen_helper_stctl(cpu_env, r1, o->in2, r3); + tcg_temp_free_i32(r1); + tcg_temp_free_i32(r3); + return DISAS_NEXT; +} + +static DisasJumpType op_stidp(DisasContext *s, DisasOps *o) +{ + tcg_gen_ld_i64(o->out, cpu_env, offsetof(CPUS390XState, cpuid)); + return DISAS_NEXT; +} + +static DisasJumpType op_spt(DisasContext *s, DisasOps *o) +{ + gen_helper_spt(cpu_env, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_stfl(DisasContext *s, DisasOps *o) +{ + gen_helper_stfl(cpu_env); + return DISAS_NEXT; +} + +static DisasJumpType op_stpt(DisasContext *s, DisasOps *o) +{ + gen_helper_stpt(o->out, cpu_env); + return DISAS_NEXT; +} + +static DisasJumpType op_stsi(DisasContext *s, DisasOps *o) +{ + gen_helper_stsi(cc_op, cpu_env, o->in2, regs[0], regs[1]); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_spx(DisasContext *s, DisasOps *o) +{ + gen_helper_spx(cpu_env, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_xsch(DisasContext *s, DisasOps *o) +{ + gen_helper_xsch(cpu_env, regs[1]); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_csch(DisasContext *s, DisasOps *o) +{ + gen_helper_csch(cpu_env, regs[1]); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_hsch(DisasContext *s, DisasOps *o) +{ + gen_helper_hsch(cpu_env, regs[1]); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_msch(DisasContext *s, DisasOps *o) +{ + gen_helper_msch(cpu_env, regs[1], o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_rchp(DisasContext *s, DisasOps *o) +{ + gen_helper_rchp(cpu_env, regs[1]); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_rsch(DisasContext *s, DisasOps *o) +{ + gen_helper_rsch(cpu_env, regs[1]); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_sal(DisasContext *s, DisasOps *o) +{ + gen_helper_sal(cpu_env, regs[1]); + return DISAS_NEXT; +} + +static DisasJumpType op_schm(DisasContext *s, DisasOps *o) +{ + gen_helper_schm(cpu_env, regs[1], regs[2], o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_siga(DisasContext *s, DisasOps *o) +{ + /* From KVM code: Not provided, set CC = 3 for subchannel not operational */ + gen_op_movi_cc(s, 3); + return DISAS_NEXT; +} + +static DisasJumpType op_stcps(DisasContext *s, DisasOps *o) +{ + /* The instruction is suppressed if not provided. */ + return DISAS_NEXT; +} + +static DisasJumpType op_ssch(DisasContext *s, DisasOps *o) +{ + gen_helper_ssch(cpu_env, regs[1], o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_stsch(DisasContext *s, DisasOps *o) +{ + gen_helper_stsch(cpu_env, regs[1], o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_stcrw(DisasContext *s, DisasOps *o) +{ + gen_helper_stcrw(cpu_env, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_tpi(DisasContext *s, DisasOps *o) +{ + gen_helper_tpi(cc_op, cpu_env, o->addr1); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_tsch(DisasContext *s, DisasOps *o) +{ + gen_helper_tsch(cpu_env, regs[1], o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_chsc(DisasContext *s, DisasOps *o) +{ + gen_helper_chsc(cpu_env, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_stpx(DisasContext *s, DisasOps *o) +{ + tcg_gen_ld_i64(o->out, cpu_env, offsetof(CPUS390XState, psa)); + tcg_gen_andi_i64(o->out, o->out, 0x7fffe000); + return DISAS_NEXT; +} + +static DisasJumpType op_stnosm(DisasContext *s, DisasOps *o) +{ + uint64_t i2 = get_field(s, i2); + TCGv_i64 t; + + /* It is important to do what the instruction name says: STORE THEN. + If we let the output hook perform the store then if we fault and + restart, we'll have the wrong SYSTEM MASK in place. */ + t = tcg_temp_new_i64(); + tcg_gen_shri_i64(t, psw_mask, 56); + tcg_gen_qemu_st8(t, o->addr1, get_mem_index(s)); + tcg_temp_free_i64(t); + + if (s->fields.op == 0xac) { + tcg_gen_andi_i64(psw_mask, psw_mask, + (i2 << 56) | 0x00ffffffffffffffull); + } else { + tcg_gen_ori_i64(psw_mask, psw_mask, i2 << 56); + } + + /* Exit to main loop to reevaluate s390_cpu_exec_interrupt. */ + return DISAS_PC_STALE_NOCHAIN; +} + +static DisasJumpType op_stura(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_st_tl(o->in1, o->in2, MMU_REAL_IDX, s->insn->data); + + if (s->base.tb->flags & FLAG_MASK_PER) { + update_psw_addr(s); + gen_helper_per_store_real(cpu_env); + } + return DISAS_NEXT; +} +#endif + +static DisasJumpType op_stfle(DisasContext *s, DisasOps *o) +{ + gen_helper_stfle(cc_op, cpu_env, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_st8(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_st8(o->in1, o->in2, get_mem_index(s)); + return DISAS_NEXT; +} + +static DisasJumpType op_st16(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_st16(o->in1, o->in2, get_mem_index(s)); + return DISAS_NEXT; +} + +static DisasJumpType op_st32(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_st32(o->in1, o->in2, get_mem_index(s)); + return DISAS_NEXT; +} + +static DisasJumpType op_st64(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_st64(o->in1, o->in2, get_mem_index(s)); + return DISAS_NEXT; +} + +static DisasJumpType op_stam(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r3 = tcg_const_i32(get_field(s, r3)); + gen_helper_stam(cpu_env, r1, o->in2, r3); + tcg_temp_free_i32(r1); + tcg_temp_free_i32(r3); + return DISAS_NEXT; +} + +static DisasJumpType op_stcm(DisasContext *s, DisasOps *o) +{ + int m3 = get_field(s, m3); + int pos, base = s->insn->data; + TCGv_i64 tmp = tcg_temp_new_i64(); + + pos = base + ctz32(m3) * 8; + switch (m3) { + case 0xf: + /* Effectively a 32-bit store. */ + tcg_gen_shri_i64(tmp, o->in1, pos); + tcg_gen_qemu_st32(tmp, o->in2, get_mem_index(s)); + break; + + case 0xc: + case 0x6: + case 0x3: + /* Effectively a 16-bit store. */ + tcg_gen_shri_i64(tmp, o->in1, pos); + tcg_gen_qemu_st16(tmp, o->in2, get_mem_index(s)); + break; + + case 0x8: + case 0x4: + case 0x2: + case 0x1: + /* Effectively an 8-bit store. */ + tcg_gen_shri_i64(tmp, o->in1, pos); + tcg_gen_qemu_st8(tmp, o->in2, get_mem_index(s)); + break; + + default: + /* This is going to be a sequence of shifts and stores. */ + pos = base + 32 - 8; + while (m3) { + if (m3 & 0x8) { + tcg_gen_shri_i64(tmp, o->in1, pos); + tcg_gen_qemu_st8(tmp, o->in2, get_mem_index(s)); + tcg_gen_addi_i64(o->in2, o->in2, 1); + } + m3 = (m3 << 1) & 0xf; + pos -= 8; + } + break; + } + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_stm(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r3 = get_field(s, r3); + int size = s->insn->data; + TCGv_i64 tsize = tcg_const_i64(size); + + while (1) { + if (size == 8) { + tcg_gen_qemu_st64(regs[r1], o->in2, get_mem_index(s)); + } else { + tcg_gen_qemu_st32(regs[r1], o->in2, get_mem_index(s)); + } + if (r1 == r3) { + break; + } + tcg_gen_add_i64(o->in2, o->in2, tsize); + r1 = (r1 + 1) & 15; + } + + tcg_temp_free_i64(tsize); + return DISAS_NEXT; +} + +static DisasJumpType op_stmh(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + int r3 = get_field(s, r3); + TCGv_i64 t = tcg_temp_new_i64(); + TCGv_i64 t4 = tcg_const_i64(4); + TCGv_i64 t32 = tcg_const_i64(32); + + while (1) { + tcg_gen_shl_i64(t, regs[r1], t32); + tcg_gen_qemu_st32(t, o->in2, get_mem_index(s)); + if (r1 == r3) { + break; + } + tcg_gen_add_i64(o->in2, o->in2, t4); + r1 = (r1 + 1) & 15; + } + + tcg_temp_free_i64(t); + tcg_temp_free_i64(t4); + tcg_temp_free_i64(t32); + return DISAS_NEXT; +} + +static DisasJumpType op_stpq(DisasContext *s, DisasOps *o) +{ + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + gen_helper_stpq(cpu_env, o->in2, o->out2, o->out); + } else if (HAVE_ATOMIC128) { + gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out); + } else { + gen_helper_exit_atomic(cpu_env); + return DISAS_NORETURN; + } + return DISAS_NEXT; +} + +static DisasJumpType op_srst(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r2 = tcg_const_i32(get_field(s, r2)); + + gen_helper_srst(cpu_env, r1, r2); + + tcg_temp_free_i32(r1); + tcg_temp_free_i32(r2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_srstu(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r2 = tcg_const_i32(get_field(s, r2)); + + gen_helper_srstu(cpu_env, r1, r2); + + tcg_temp_free_i32(r1); + tcg_temp_free_i32(r2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_sub(DisasContext *s, DisasOps *o) +{ + tcg_gen_sub_i64(o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_subu64(DisasContext *s, DisasOps *o) +{ + tcg_gen_movi_i64(cc_src, 0); + tcg_gen_sub2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src); + return DISAS_NEXT; +} + +/* Compute borrow (0, -1) into cc_src. */ +static void compute_borrow(DisasContext *s) +{ + switch (s->cc_op) { + case CC_OP_SUBU: + /* The borrow value is already in cc_src (0,-1). */ + break; + default: + gen_op_calc_cc(s); + /* fall through */ + case CC_OP_STATIC: + /* The carry flag is the msb of CC; compute into cc_src. */ + tcg_gen_extu_i32_i64(cc_src, cc_op); + tcg_gen_shri_i64(cc_src, cc_src, 1); + /* fall through */ + case CC_OP_ADDU: + /* Convert carry (1,0) to borrow (0,-1). */ + tcg_gen_subi_i64(cc_src, cc_src, 1); + break; + } +} + +static DisasJumpType op_subb32(DisasContext *s, DisasOps *o) +{ + compute_borrow(s); + + /* Borrow is {0, -1}, so add to subtract. */ + tcg_gen_add_i64(o->out, o->in1, cc_src); + tcg_gen_sub_i64(o->out, o->out, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_subb64(DisasContext *s, DisasOps *o) +{ + compute_borrow(s); + + /* + * Borrow is {0, -1}, so add to subtract; replicate the + * borrow input to produce 128-bit -1 for the addition. + */ + TCGv_i64 zero = tcg_const_i64(0); + tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, cc_src); + tcg_gen_sub2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero); + tcg_temp_free_i64(zero); + + return DISAS_NEXT; +} + +static DisasJumpType op_svc(DisasContext *s, DisasOps *o) +{ + TCGv_i32 t; + + update_psw_addr(s); + update_cc_op(s); + + t = tcg_const_i32(get_field(s, i1) & 0xff); + tcg_gen_st_i32(t, cpu_env, offsetof(CPUS390XState, int_svc_code)); + tcg_temp_free_i32(t); + + t = tcg_const_i32(s->ilen); + tcg_gen_st_i32(t, cpu_env, offsetof(CPUS390XState, int_svc_ilen)); + tcg_temp_free_i32(t); + + gen_exception(EXCP_SVC); + return DISAS_NORETURN; +} + +static DisasJumpType op_tam(DisasContext *s, DisasOps *o) +{ + int cc = 0; + + cc |= (s->base.tb->flags & FLAG_MASK_64) ? 2 : 0; + cc |= (s->base.tb->flags & FLAG_MASK_32) ? 1 : 0; + gen_op_movi_cc(s, cc); + return DISAS_NEXT; +} + +static DisasJumpType op_tceb(DisasContext *s, DisasOps *o) +{ + gen_helper_tceb(cc_op, cpu_env, o->in1, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_tcdb(DisasContext *s, DisasOps *o) +{ + gen_helper_tcdb(cc_op, cpu_env, o->in1, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_tcxb(DisasContext *s, DisasOps *o) +{ + gen_helper_tcxb(cc_op, cpu_env, o->out, o->out2, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +#ifndef CONFIG_USER_ONLY + +static DisasJumpType op_testblock(DisasContext *s, DisasOps *o) +{ + gen_helper_testblock(cc_op, cpu_env, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_tprot(DisasContext *s, DisasOps *o) +{ + gen_helper_tprot(cc_op, cpu_env, o->addr1, o->in2); + set_cc_static(s); + return DISAS_NEXT; +} + +#endif + +static DisasJumpType op_tp(DisasContext *s, DisasOps *o) +{ + TCGv_i32 l1 = tcg_const_i32(get_field(s, l1) + 1); + gen_helper_tp(cc_op, cpu_env, o->addr1, l1); + tcg_temp_free_i32(l1); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_tr(DisasContext *s, DisasOps *o) +{ + TCGv_i32 l = tcg_const_i32(get_field(s, l1)); + gen_helper_tr(cpu_env, l, o->addr1, o->in2); + tcg_temp_free_i32(l); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_tre(DisasContext *s, DisasOps *o) +{ + gen_helper_tre(o->out, cpu_env, o->out, o->out2, o->in2); + return_low128(o->out2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_trt(DisasContext *s, DisasOps *o) +{ + TCGv_i32 l = tcg_const_i32(get_field(s, l1)); + gen_helper_trt(cc_op, cpu_env, l, o->addr1, o->in2); + tcg_temp_free_i32(l); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_trtr(DisasContext *s, DisasOps *o) +{ + TCGv_i32 l = tcg_const_i32(get_field(s, l1)); + gen_helper_trtr(cc_op, cpu_env, l, o->addr1, o->in2); + tcg_temp_free_i32(l); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_trXX(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r2 = tcg_const_i32(get_field(s, r2)); + TCGv_i32 sizes = tcg_const_i32(s->insn->opc & 3); + TCGv_i32 tst = tcg_temp_new_i32(); + int m3 = get_field(s, m3); + + if (!s390_has_feat(S390_FEAT_ETF2_ENH)) { + m3 = 0; + } + if (m3 & 1) { + tcg_gen_movi_i32(tst, -1); + } else { + tcg_gen_extrl_i64_i32(tst, regs[0]); + if (s->insn->opc & 3) { + tcg_gen_ext8u_i32(tst, tst); + } else { + tcg_gen_ext16u_i32(tst, tst); + } + } + gen_helper_trXX(cc_op, cpu_env, r1, r2, tst, sizes); + + tcg_temp_free_i32(r1); + tcg_temp_free_i32(r2); + tcg_temp_free_i32(sizes); + tcg_temp_free_i32(tst); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_ts(DisasContext *s, DisasOps *o) +{ + TCGv_i32 t1 = tcg_const_i32(0xff); + tcg_gen_atomic_xchg_i32(t1, o->in2, t1, get_mem_index(s), MO_UB); + tcg_gen_extract_i32(cc_op, t1, 7, 1); + tcg_temp_free_i32(t1); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_unpk(DisasContext *s, DisasOps *o) +{ + TCGv_i32 l = tcg_const_i32(get_field(s, l1)); + gen_helper_unpk(cpu_env, l, o->addr1, o->in2); + tcg_temp_free_i32(l); + return DISAS_NEXT; +} + +static DisasJumpType op_unpka(DisasContext *s, DisasOps *o) +{ + int l1 = get_field(s, l1) + 1; + TCGv_i32 l; + + /* The length must not exceed 32 bytes. */ + if (l1 > 32) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + l = tcg_const_i32(l1); + gen_helper_unpka(cc_op, cpu_env, o->addr1, l, o->in2); + tcg_temp_free_i32(l); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_unpku(DisasContext *s, DisasOps *o) +{ + int l1 = get_field(s, l1) + 1; + TCGv_i32 l; + + /* The length must be even and should not exceed 64 bytes. */ + if ((l1 & 1) || (l1 > 64)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + l = tcg_const_i32(l1); + gen_helper_unpku(cc_op, cpu_env, o->addr1, l, o->in2); + tcg_temp_free_i32(l); + set_cc_static(s); + return DISAS_NEXT; +} + + +static DisasJumpType op_xc(DisasContext *s, DisasOps *o) +{ + int d1 = get_field(s, d1); + int d2 = get_field(s, d2); + int b1 = get_field(s, b1); + int b2 = get_field(s, b2); + int l = get_field(s, l1); + TCGv_i32 t32; + + o->addr1 = get_address(s, 0, b1, d1); + + /* If the addresses are identical, this is a store/memset of zero. */ + if (b1 == b2 && d1 == d2 && (l + 1) <= 32) { + o->in2 = tcg_const_i64(0); + + l++; + while (l >= 8) { + tcg_gen_qemu_st64(o->in2, o->addr1, get_mem_index(s)); + l -= 8; + if (l > 0) { + tcg_gen_addi_i64(o->addr1, o->addr1, 8); + } + } + if (l >= 4) { + tcg_gen_qemu_st32(o->in2, o->addr1, get_mem_index(s)); + l -= 4; + if (l > 0) { + tcg_gen_addi_i64(o->addr1, o->addr1, 4); + } + } + if (l >= 2) { + tcg_gen_qemu_st16(o->in2, o->addr1, get_mem_index(s)); + l -= 2; + if (l > 0) { + tcg_gen_addi_i64(o->addr1, o->addr1, 2); + } + } + if (l) { + tcg_gen_qemu_st8(o->in2, o->addr1, get_mem_index(s)); + } + gen_op_movi_cc(s, 0); + return DISAS_NEXT; + } + + /* But in general we'll defer to a helper. */ + o->in2 = get_address(s, 0, b2, d2); + t32 = tcg_const_i32(l); + gen_helper_xc(cc_op, cpu_env, t32, o->addr1, o->in2); + tcg_temp_free_i32(t32); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_xor(DisasContext *s, DisasOps *o) +{ + tcg_gen_xor_i64(o->out, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_xori(DisasContext *s, DisasOps *o) +{ + int shift = s->insn->data & 0xff; + int size = s->insn->data >> 8; + uint64_t mask = ((1ull << size) - 1) << shift; + + assert(!o->g_in2); + tcg_gen_shli_i64(o->in2, o->in2, shift); + tcg_gen_xor_i64(o->out, o->in1, o->in2); + + /* Produce the CC from only the bits manipulated. */ + tcg_gen_andi_i64(cc_dst, o->out, mask); + set_cc_nz_u64(s, cc_dst); + return DISAS_NEXT; +} + +static DisasJumpType op_xi(DisasContext *s, DisasOps *o) +{ + o->in1 = tcg_temp_new_i64(); + + if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) { + tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data); + } else { + /* Perform the atomic operation in memory. */ + tcg_gen_atomic_fetch_xor_i64(o->in1, o->addr1, o->in2, get_mem_index(s), + s->insn->data); + } + + /* Recompute also for atomic case: needed for setting CC. */ + tcg_gen_xor_i64(o->out, o->in1, o->in2); + + if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) { + tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data); + } + return DISAS_NEXT; +} + +static DisasJumpType op_zero(DisasContext *s, DisasOps *o) +{ + o->out = tcg_const_i64(0); + return DISAS_NEXT; +} + +static DisasJumpType op_zero2(DisasContext *s, DisasOps *o) +{ + o->out = tcg_const_i64(0); + o->out2 = o->out; + o->g_out2 = true; + return DISAS_NEXT; +} + +#ifndef CONFIG_USER_ONLY +static DisasJumpType op_clp(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r2 = tcg_const_i32(get_field(s, r2)); + + gen_helper_clp(cpu_env, r2); + tcg_temp_free_i32(r2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_pcilg(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r2 = tcg_const_i32(get_field(s, r2)); + + gen_helper_pcilg(cpu_env, r1, r2); + tcg_temp_free_i32(r1); + tcg_temp_free_i32(r2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_pcistg(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r2 = tcg_const_i32(get_field(s, r2)); + + gen_helper_pcistg(cpu_env, r1, r2); + tcg_temp_free_i32(r1); + tcg_temp_free_i32(r2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_stpcifc(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 ar = tcg_const_i32(get_field(s, b2)); + + gen_helper_stpcifc(cpu_env, r1, o->addr1, ar); + tcg_temp_free_i32(ar); + tcg_temp_free_i32(r1); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_sic(DisasContext *s, DisasOps *o) +{ + gen_helper_sic(cpu_env, o->in1, o->in2); + return DISAS_NEXT; +} + +static DisasJumpType op_rpcit(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r2 = tcg_const_i32(get_field(s, r2)); + + gen_helper_rpcit(cpu_env, r1, r2); + tcg_temp_free_i32(r1); + tcg_temp_free_i32(r2); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_pcistb(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 r3 = tcg_const_i32(get_field(s, r3)); + TCGv_i32 ar = tcg_const_i32(get_field(s, b2)); + + gen_helper_pcistb(cpu_env, r1, r3, o->addr1, ar); + tcg_temp_free_i32(ar); + tcg_temp_free_i32(r1); + tcg_temp_free_i32(r3); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_mpcifc(DisasContext *s, DisasOps *o) +{ + TCGv_i32 r1 = tcg_const_i32(get_field(s, r1)); + TCGv_i32 ar = tcg_const_i32(get_field(s, b2)); + + gen_helper_mpcifc(cpu_env, r1, o->addr1, ar); + tcg_temp_free_i32(ar); + tcg_temp_free_i32(r1); + set_cc_static(s); + return DISAS_NEXT; +} +#endif + +#include "translate_vx.c.inc" + +/* ====================================================================== */ +/* The "Cc OUTput" generators. Given the generated output (and in some cases + the original inputs), update the various cc data structures in order to + be able to compute the new condition code. */ + +static void cout_abs32(DisasContext *s, DisasOps *o) +{ + gen_op_update1_cc_i64(s, CC_OP_ABS_32, o->out); +} + +static void cout_abs64(DisasContext *s, DisasOps *o) +{ + gen_op_update1_cc_i64(s, CC_OP_ABS_64, o->out); +} + +static void cout_adds32(DisasContext *s, DisasOps *o) +{ + gen_op_update3_cc_i64(s, CC_OP_ADD_32, o->in1, o->in2, o->out); +} + +static void cout_adds64(DisasContext *s, DisasOps *o) +{ + gen_op_update3_cc_i64(s, CC_OP_ADD_64, o->in1, o->in2, o->out); +} + +static void cout_addu32(DisasContext *s, DisasOps *o) +{ + tcg_gen_shri_i64(cc_src, o->out, 32); + tcg_gen_ext32u_i64(cc_dst, o->out); + gen_op_update2_cc_i64(s, CC_OP_ADDU, cc_src, cc_dst); +} + +static void cout_addu64(DisasContext *s, DisasOps *o) +{ + gen_op_update2_cc_i64(s, CC_OP_ADDU, cc_src, o->out); +} + +static void cout_cmps32(DisasContext *s, DisasOps *o) +{ + gen_op_update2_cc_i64(s, CC_OP_LTGT_32, o->in1, o->in2); +} + +static void cout_cmps64(DisasContext *s, DisasOps *o) +{ + gen_op_update2_cc_i64(s, CC_OP_LTGT_64, o->in1, o->in2); +} + +static void cout_cmpu32(DisasContext *s, DisasOps *o) +{ + gen_op_update2_cc_i64(s, CC_OP_LTUGTU_32, o->in1, o->in2); +} + +static void cout_cmpu64(DisasContext *s, DisasOps *o) +{ + gen_op_update2_cc_i64(s, CC_OP_LTUGTU_64, o->in1, o->in2); +} + +static void cout_f32(DisasContext *s, DisasOps *o) +{ + gen_op_update1_cc_i64(s, CC_OP_NZ_F32, o->out); +} + +static void cout_f64(DisasContext *s, DisasOps *o) +{ + gen_op_update1_cc_i64(s, CC_OP_NZ_F64, o->out); +} + +static void cout_f128(DisasContext *s, DisasOps *o) +{ + gen_op_update2_cc_i64(s, CC_OP_NZ_F128, o->out, o->out2); +} + +static void cout_nabs32(DisasContext *s, DisasOps *o) +{ + gen_op_update1_cc_i64(s, CC_OP_NABS_32, o->out); +} + +static void cout_nabs64(DisasContext *s, DisasOps *o) +{ + gen_op_update1_cc_i64(s, CC_OP_NABS_64, o->out); +} + +static void cout_neg32(DisasContext *s, DisasOps *o) +{ + gen_op_update1_cc_i64(s, CC_OP_COMP_32, o->out); +} + +static void cout_neg64(DisasContext *s, DisasOps *o) +{ + gen_op_update1_cc_i64(s, CC_OP_COMP_64, o->out); +} + +static void cout_nz32(DisasContext *s, DisasOps *o) +{ + tcg_gen_ext32u_i64(cc_dst, o->out); + gen_op_update1_cc_i64(s, CC_OP_NZ, cc_dst); +} + +static void cout_nz64(DisasContext *s, DisasOps *o) +{ + gen_op_update1_cc_i64(s, CC_OP_NZ, o->out); +} + +static void cout_s32(DisasContext *s, DisasOps *o) +{ + gen_op_update1_cc_i64(s, CC_OP_LTGT0_32, o->out); +} + +static void cout_s64(DisasContext *s, DisasOps *o) +{ + gen_op_update1_cc_i64(s, CC_OP_LTGT0_64, o->out); +} + +static void cout_subs32(DisasContext *s, DisasOps *o) +{ + gen_op_update3_cc_i64(s, CC_OP_SUB_32, o->in1, o->in2, o->out); +} + +static void cout_subs64(DisasContext *s, DisasOps *o) +{ + gen_op_update3_cc_i64(s, CC_OP_SUB_64, o->in1, o->in2, o->out); +} + +static void cout_subu32(DisasContext *s, DisasOps *o) +{ + tcg_gen_sari_i64(cc_src, o->out, 32); + tcg_gen_ext32u_i64(cc_dst, o->out); + gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, cc_dst); +} + +static void cout_subu64(DisasContext *s, DisasOps *o) +{ + gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, o->out); +} + +static void cout_tm32(DisasContext *s, DisasOps *o) +{ + gen_op_update2_cc_i64(s, CC_OP_TM_32, o->in1, o->in2); +} + +static void cout_tm64(DisasContext *s, DisasOps *o) +{ + gen_op_update2_cc_i64(s, CC_OP_TM_64, o->in1, o->in2); +} + +static void cout_muls32(DisasContext *s, DisasOps *o) +{ + gen_op_update1_cc_i64(s, CC_OP_MULS_32, o->out); +} + +static void cout_muls64(DisasContext *s, DisasOps *o) +{ + /* out contains "high" part, out2 contains "low" part of 128 bit result */ + gen_op_update2_cc_i64(s, CC_OP_MULS_64, o->out, o->out2); +} + +/* ====================================================================== */ +/* The "PREParation" generators. These initialize the DisasOps.OUT fields + with the TCG register to which we will write. Used in combination with + the "wout" generators, in some cases we need a new temporary, and in + some cases we can write to a TCG global. */ + +static void prep_new(DisasContext *s, DisasOps *o) +{ + o->out = tcg_temp_new_i64(); +} +#define SPEC_prep_new 0 + +static void prep_new_P(DisasContext *s, DisasOps *o) +{ + o->out = tcg_temp_new_i64(); + o->out2 = tcg_temp_new_i64(); +} +#define SPEC_prep_new_P 0 + +static void prep_r1(DisasContext *s, DisasOps *o) +{ + o->out = regs[get_field(s, r1)]; + o->g_out = true; +} +#define SPEC_prep_r1 0 + +static void prep_r1_P(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + o->out = regs[r1]; + o->out2 = regs[r1 + 1]; + o->g_out = o->g_out2 = true; +} +#define SPEC_prep_r1_P SPEC_r1_even + +/* Whenever we need x1 in addition to other inputs, we'll load it to out/out2 */ +static void prep_x1(DisasContext *s, DisasOps *o) +{ + o->out = load_freg(get_field(s, r1)); + o->out2 = load_freg(get_field(s, r1) + 2); +} +#define SPEC_prep_x1 SPEC_r1_f128 + +/* ====================================================================== */ +/* The "Write OUTput" generators. These generally perform some non-trivial + copy of data to TCG globals, or to main memory. The trivial cases are + generally handled by having a "prep" generator install the TCG global + as the destination of the operation. */ + +static void wout_r1(DisasContext *s, DisasOps *o) +{ + store_reg(get_field(s, r1), o->out); +} +#define SPEC_wout_r1 0 + +static void wout_out2_r1(DisasContext *s, DisasOps *o) +{ + store_reg(get_field(s, r1), o->out2); +} +#define SPEC_wout_out2_r1 0 + +static void wout_r1_8(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + tcg_gen_deposit_i64(regs[r1], regs[r1], o->out, 0, 8); +} +#define SPEC_wout_r1_8 0 + +static void wout_r1_16(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + tcg_gen_deposit_i64(regs[r1], regs[r1], o->out, 0, 16); +} +#define SPEC_wout_r1_16 0 + +static void wout_r1_32(DisasContext *s, DisasOps *o) +{ + store_reg32_i64(get_field(s, r1), o->out); +} +#define SPEC_wout_r1_32 0 + +static void wout_r1_32h(DisasContext *s, DisasOps *o) +{ + store_reg32h_i64(get_field(s, r1), o->out); +} +#define SPEC_wout_r1_32h 0 + +static void wout_r1_P32(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + store_reg32_i64(r1, o->out); + store_reg32_i64(r1 + 1, o->out2); +} +#define SPEC_wout_r1_P32 SPEC_r1_even + +static void wout_r1_D32(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + store_reg32_i64(r1 + 1, o->out); + tcg_gen_shri_i64(o->out, o->out, 32); + store_reg32_i64(r1, o->out); +} +#define SPEC_wout_r1_D32 SPEC_r1_even + +static void wout_r3_P32(DisasContext *s, DisasOps *o) +{ + int r3 = get_field(s, r3); + store_reg32_i64(r3, o->out); + store_reg32_i64(r3 + 1, o->out2); +} +#define SPEC_wout_r3_P32 SPEC_r3_even + +static void wout_r3_P64(DisasContext *s, DisasOps *o) +{ + int r3 = get_field(s, r3); + store_reg(r3, o->out); + store_reg(r3 + 1, o->out2); +} +#define SPEC_wout_r3_P64 SPEC_r3_even + +static void wout_e1(DisasContext *s, DisasOps *o) +{ + store_freg32_i64(get_field(s, r1), o->out); +} +#define SPEC_wout_e1 0 + +static void wout_f1(DisasContext *s, DisasOps *o) +{ + store_freg(get_field(s, r1), o->out); +} +#define SPEC_wout_f1 0 + +static void wout_x1(DisasContext *s, DisasOps *o) +{ + int f1 = get_field(s, r1); + store_freg(f1, o->out); + store_freg(f1 + 2, o->out2); +} +#define SPEC_wout_x1 SPEC_r1_f128 + +static void wout_cond_r1r2_32(DisasContext *s, DisasOps *o) +{ + if (get_field(s, r1) != get_field(s, r2)) { + store_reg32_i64(get_field(s, r1), o->out); + } +} +#define SPEC_wout_cond_r1r2_32 0 + +static void wout_cond_e1e2(DisasContext *s, DisasOps *o) +{ + if (get_field(s, r1) != get_field(s, r2)) { + store_freg32_i64(get_field(s, r1), o->out); + } +} +#define SPEC_wout_cond_e1e2 0 + +static void wout_m1_8(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_st8(o->out, o->addr1, get_mem_index(s)); +} +#define SPEC_wout_m1_8 0 + +static void wout_m1_16(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_st16(o->out, o->addr1, get_mem_index(s)); +} +#define SPEC_wout_m1_16 0 + +#ifndef CONFIG_USER_ONLY +static void wout_m1_16a(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), MO_TEUW | MO_ALIGN); +} +#define SPEC_wout_m1_16a 0 +#endif + +static void wout_m1_32(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_st32(o->out, o->addr1, get_mem_index(s)); +} +#define SPEC_wout_m1_32 0 + +#ifndef CONFIG_USER_ONLY +static void wout_m1_32a(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), MO_TEUL | MO_ALIGN); +} +#define SPEC_wout_m1_32a 0 +#endif + +static void wout_m1_64(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_st64(o->out, o->addr1, get_mem_index(s)); +} +#define SPEC_wout_m1_64 0 + +#ifndef CONFIG_USER_ONLY +static void wout_m1_64a(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_st_i64(o->out, o->addr1, get_mem_index(s), MO_TEQ | MO_ALIGN); +} +#define SPEC_wout_m1_64a 0 +#endif + +static void wout_m2_32(DisasContext *s, DisasOps *o) +{ + tcg_gen_qemu_st32(o->out, o->in2, get_mem_index(s)); +} +#define SPEC_wout_m2_32 0 + +static void wout_in2_r1(DisasContext *s, DisasOps *o) +{ + store_reg(get_field(s, r1), o->in2); +} +#define SPEC_wout_in2_r1 0 + +static void wout_in2_r1_32(DisasContext *s, DisasOps *o) +{ + store_reg32_i64(get_field(s, r1), o->in2); +} +#define SPEC_wout_in2_r1_32 0 + +/* ====================================================================== */ +/* The "INput 1" generators. These load the first operand to an insn. */ + +static void in1_r1(DisasContext *s, DisasOps *o) +{ + o->in1 = load_reg(get_field(s, r1)); +} +#define SPEC_in1_r1 0 + +static void in1_r1_o(DisasContext *s, DisasOps *o) +{ + o->in1 = regs[get_field(s, r1)]; + o->g_in1 = true; +} +#define SPEC_in1_r1_o 0 + +static void in1_r1_32s(DisasContext *s, DisasOps *o) +{ + o->in1 = tcg_temp_new_i64(); + tcg_gen_ext32s_i64(o->in1, regs[get_field(s, r1)]); +} +#define SPEC_in1_r1_32s 0 + +static void in1_r1_32u(DisasContext *s, DisasOps *o) +{ + o->in1 = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r1)]); +} +#define SPEC_in1_r1_32u 0 + +static void in1_r1_sr32(DisasContext *s, DisasOps *o) +{ + o->in1 = tcg_temp_new_i64(); + tcg_gen_shri_i64(o->in1, regs[get_field(s, r1)], 32); +} +#define SPEC_in1_r1_sr32 0 + +static void in1_r1p1(DisasContext *s, DisasOps *o) +{ + o->in1 = load_reg(get_field(s, r1) + 1); +} +#define SPEC_in1_r1p1 SPEC_r1_even + +static void in1_r1p1_o(DisasContext *s, DisasOps *o) +{ + o->in1 = regs[get_field(s, r1) + 1]; + o->g_in1 = true; +} +#define SPEC_in1_r1p1_o SPEC_r1_even + +static void in1_r1p1_32s(DisasContext *s, DisasOps *o) +{ + o->in1 = tcg_temp_new_i64(); + tcg_gen_ext32s_i64(o->in1, regs[get_field(s, r1) + 1]); +} +#define SPEC_in1_r1p1_32s SPEC_r1_even + +static void in1_r1p1_32u(DisasContext *s, DisasOps *o) +{ + o->in1 = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r1) + 1]); +} +#define SPEC_in1_r1p1_32u SPEC_r1_even + +static void in1_r1_D32(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + o->in1 = tcg_temp_new_i64(); + tcg_gen_concat32_i64(o->in1, regs[r1 + 1], regs[r1]); +} +#define SPEC_in1_r1_D32 SPEC_r1_even + +static void in1_r2(DisasContext *s, DisasOps *o) +{ + o->in1 = load_reg(get_field(s, r2)); +} +#define SPEC_in1_r2 0 + +static void in1_r2_sr32(DisasContext *s, DisasOps *o) +{ + o->in1 = tcg_temp_new_i64(); + tcg_gen_shri_i64(o->in1, regs[get_field(s, r2)], 32); +} +#define SPEC_in1_r2_sr32 0 + +static void in1_r2_32u(DisasContext *s, DisasOps *o) +{ + o->in1 = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r2)]); +} +#define SPEC_in1_r2_32u 0 + +static void in1_r3(DisasContext *s, DisasOps *o) +{ + o->in1 = load_reg(get_field(s, r3)); +} +#define SPEC_in1_r3 0 + +static void in1_r3_o(DisasContext *s, DisasOps *o) +{ + o->in1 = regs[get_field(s, r3)]; + o->g_in1 = true; +} +#define SPEC_in1_r3_o 0 + +static void in1_r3_32s(DisasContext *s, DisasOps *o) +{ + o->in1 = tcg_temp_new_i64(); + tcg_gen_ext32s_i64(o->in1, regs[get_field(s, r3)]); +} +#define SPEC_in1_r3_32s 0 + +static void in1_r3_32u(DisasContext *s, DisasOps *o) +{ + o->in1 = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r3)]); +} +#define SPEC_in1_r3_32u 0 + +static void in1_r3_D32(DisasContext *s, DisasOps *o) +{ + int r3 = get_field(s, r3); + o->in1 = tcg_temp_new_i64(); + tcg_gen_concat32_i64(o->in1, regs[r3 + 1], regs[r3]); +} +#define SPEC_in1_r3_D32 SPEC_r3_even + +static void in1_e1(DisasContext *s, DisasOps *o) +{ + o->in1 = load_freg32_i64(get_field(s, r1)); +} +#define SPEC_in1_e1 0 + +static void in1_f1(DisasContext *s, DisasOps *o) +{ + o->in1 = load_freg(get_field(s, r1)); +} +#define SPEC_in1_f1 0 + +/* Load the high double word of an extended (128-bit) format FP number */ +static void in1_x2h(DisasContext *s, DisasOps *o) +{ + o->in1 = load_freg(get_field(s, r2)); +} +#define SPEC_in1_x2h SPEC_r2_f128 + +static void in1_f3(DisasContext *s, DisasOps *o) +{ + o->in1 = load_freg(get_field(s, r3)); +} +#define SPEC_in1_f3 0 + +static void in1_la1(DisasContext *s, DisasOps *o) +{ + o->addr1 = get_address(s, 0, get_field(s, b1), get_field(s, d1)); +} +#define SPEC_in1_la1 0 + +static void in1_la2(DisasContext *s, DisasOps *o) +{ + int x2 = have_field(s, x2) ? get_field(s, x2) : 0; + o->addr1 = get_address(s, x2, get_field(s, b2), get_field(s, d2)); +} +#define SPEC_in1_la2 0 + +static void in1_m1_8u(DisasContext *s, DisasOps *o) +{ + in1_la1(s, o); + o->in1 = tcg_temp_new_i64(); + tcg_gen_qemu_ld8u(o->in1, o->addr1, get_mem_index(s)); +} +#define SPEC_in1_m1_8u 0 + +static void in1_m1_16s(DisasContext *s, DisasOps *o) +{ + in1_la1(s, o); + o->in1 = tcg_temp_new_i64(); + tcg_gen_qemu_ld16s(o->in1, o->addr1, get_mem_index(s)); +} +#define SPEC_in1_m1_16s 0 + +static void in1_m1_16u(DisasContext *s, DisasOps *o) +{ + in1_la1(s, o); + o->in1 = tcg_temp_new_i64(); + tcg_gen_qemu_ld16u(o->in1, o->addr1, get_mem_index(s)); +} +#define SPEC_in1_m1_16u 0 + +static void in1_m1_32s(DisasContext *s, DisasOps *o) +{ + in1_la1(s, o); + o->in1 = tcg_temp_new_i64(); + tcg_gen_qemu_ld32s(o->in1, o->addr1, get_mem_index(s)); +} +#define SPEC_in1_m1_32s 0 + +static void in1_m1_32u(DisasContext *s, DisasOps *o) +{ + in1_la1(s, o); + o->in1 = tcg_temp_new_i64(); + tcg_gen_qemu_ld32u(o->in1, o->addr1, get_mem_index(s)); +} +#define SPEC_in1_m1_32u 0 + +static void in1_m1_64(DisasContext *s, DisasOps *o) +{ + in1_la1(s, o); + o->in1 = tcg_temp_new_i64(); + tcg_gen_qemu_ld64(o->in1, o->addr1, get_mem_index(s)); +} +#define SPEC_in1_m1_64 0 + +/* ====================================================================== */ +/* The "INput 2" generators. These load the second operand to an insn. */ + +static void in2_r1_o(DisasContext *s, DisasOps *o) +{ + o->in2 = regs[get_field(s, r1)]; + o->g_in2 = true; +} +#define SPEC_in2_r1_o 0 + +static void in2_r1_16u(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_temp_new_i64(); + tcg_gen_ext16u_i64(o->in2, regs[get_field(s, r1)]); +} +#define SPEC_in2_r1_16u 0 + +static void in2_r1_32u(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(o->in2, regs[get_field(s, r1)]); +} +#define SPEC_in2_r1_32u 0 + +static void in2_r1_D32(DisasContext *s, DisasOps *o) +{ + int r1 = get_field(s, r1); + o->in2 = tcg_temp_new_i64(); + tcg_gen_concat32_i64(o->in2, regs[r1 + 1], regs[r1]); +} +#define SPEC_in2_r1_D32 SPEC_r1_even + +static void in2_r2(DisasContext *s, DisasOps *o) +{ + o->in2 = load_reg(get_field(s, r2)); +} +#define SPEC_in2_r2 0 + +static void in2_r2_o(DisasContext *s, DisasOps *o) +{ + o->in2 = regs[get_field(s, r2)]; + o->g_in2 = true; +} +#define SPEC_in2_r2_o 0 + +static void in2_r2_nz(DisasContext *s, DisasOps *o) +{ + int r2 = get_field(s, r2); + if (r2 != 0) { + o->in2 = load_reg(r2); + } +} +#define SPEC_in2_r2_nz 0 + +static void in2_r2_8s(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_temp_new_i64(); + tcg_gen_ext8s_i64(o->in2, regs[get_field(s, r2)]); +} +#define SPEC_in2_r2_8s 0 + +static void in2_r2_8u(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_temp_new_i64(); + tcg_gen_ext8u_i64(o->in2, regs[get_field(s, r2)]); +} +#define SPEC_in2_r2_8u 0 + +static void in2_r2_16s(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_temp_new_i64(); + tcg_gen_ext16s_i64(o->in2, regs[get_field(s, r2)]); +} +#define SPEC_in2_r2_16s 0 + +static void in2_r2_16u(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_temp_new_i64(); + tcg_gen_ext16u_i64(o->in2, regs[get_field(s, r2)]); +} +#define SPEC_in2_r2_16u 0 + +static void in2_r3(DisasContext *s, DisasOps *o) +{ + o->in2 = load_reg(get_field(s, r3)); +} +#define SPEC_in2_r3 0 + +static void in2_r3_sr32(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_temp_new_i64(); + tcg_gen_shri_i64(o->in2, regs[get_field(s, r3)], 32); +} +#define SPEC_in2_r3_sr32 0 + +static void in2_r3_32u(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(o->in2, regs[get_field(s, r3)]); +} +#define SPEC_in2_r3_32u 0 + +static void in2_r2_32s(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_temp_new_i64(); + tcg_gen_ext32s_i64(o->in2, regs[get_field(s, r2)]); +} +#define SPEC_in2_r2_32s 0 + +static void in2_r2_32u(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(o->in2, regs[get_field(s, r2)]); +} +#define SPEC_in2_r2_32u 0 + +static void in2_r2_sr32(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_temp_new_i64(); + tcg_gen_shri_i64(o->in2, regs[get_field(s, r2)], 32); +} +#define SPEC_in2_r2_sr32 0 + +static void in2_e2(DisasContext *s, DisasOps *o) +{ + o->in2 = load_freg32_i64(get_field(s, r2)); +} +#define SPEC_in2_e2 0 + +static void in2_f2(DisasContext *s, DisasOps *o) +{ + o->in2 = load_freg(get_field(s, r2)); +} +#define SPEC_in2_f2 0 + +/* Load the low double word of an extended (128-bit) format FP number */ +static void in2_x2l(DisasContext *s, DisasOps *o) +{ + o->in2 = load_freg(get_field(s, r2) + 2); +} +#define SPEC_in2_x2l SPEC_r2_f128 + +static void in2_ra2(DisasContext *s, DisasOps *o) +{ + int r2 = get_field(s, r2); + + /* Note: *don't* treat !r2 as 0, use the reg value. */ + o->in2 = tcg_temp_new_i64(); + gen_addi_and_wrap_i64(s, o->in2, regs[r2], 0); +} +#define SPEC_in2_ra2 0 + +static void in2_a2(DisasContext *s, DisasOps *o) +{ + int x2 = have_field(s, x2) ? get_field(s, x2) : 0; + o->in2 = get_address(s, x2, get_field(s, b2), get_field(s, d2)); +} +#define SPEC_in2_a2 0 + +static void in2_ri2(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_const_i64(s->base.pc_next + (int64_t)get_field(s, i2) * 2); +} +#define SPEC_in2_ri2 0 + +static void in2_sh32(DisasContext *s, DisasOps *o) +{ + help_l2_shift(s, o, 31); +} +#define SPEC_in2_sh32 0 + +static void in2_sh64(DisasContext *s, DisasOps *o) +{ + help_l2_shift(s, o, 63); +} +#define SPEC_in2_sh64 0 + +static void in2_m2_8u(DisasContext *s, DisasOps *o) +{ + in2_a2(s, o); + tcg_gen_qemu_ld8u(o->in2, o->in2, get_mem_index(s)); +} +#define SPEC_in2_m2_8u 0 + +static void in2_m2_16s(DisasContext *s, DisasOps *o) +{ + in2_a2(s, o); + tcg_gen_qemu_ld16s(o->in2, o->in2, get_mem_index(s)); +} +#define SPEC_in2_m2_16s 0 + +static void in2_m2_16u(DisasContext *s, DisasOps *o) +{ + in2_a2(s, o); + tcg_gen_qemu_ld16u(o->in2, o->in2, get_mem_index(s)); +} +#define SPEC_in2_m2_16u 0 + +static void in2_m2_32s(DisasContext *s, DisasOps *o) +{ + in2_a2(s, o); + tcg_gen_qemu_ld32s(o->in2, o->in2, get_mem_index(s)); +} +#define SPEC_in2_m2_32s 0 + +static void in2_m2_32u(DisasContext *s, DisasOps *o) +{ + in2_a2(s, o); + tcg_gen_qemu_ld32u(o->in2, o->in2, get_mem_index(s)); +} +#define SPEC_in2_m2_32u 0 + +#ifndef CONFIG_USER_ONLY +static void in2_m2_32ua(DisasContext *s, DisasOps *o) +{ + in2_a2(s, o); + tcg_gen_qemu_ld_tl(o->in2, o->in2, get_mem_index(s), MO_TEUL | MO_ALIGN); +} +#define SPEC_in2_m2_32ua 0 +#endif + +static void in2_m2_64(DisasContext *s, DisasOps *o) +{ + in2_a2(s, o); + tcg_gen_qemu_ld64(o->in2, o->in2, get_mem_index(s)); +} +#define SPEC_in2_m2_64 0 + +static void in2_m2_64w(DisasContext *s, DisasOps *o) +{ + in2_a2(s, o); + tcg_gen_qemu_ld64(o->in2, o->in2, get_mem_index(s)); + gen_addi_and_wrap_i64(s, o->in2, o->in2, 0); +} +#define SPEC_in2_m2_64w 0 + +#ifndef CONFIG_USER_ONLY +static void in2_m2_64a(DisasContext *s, DisasOps *o) +{ + in2_a2(s, o); + tcg_gen_qemu_ld_i64(o->in2, o->in2, get_mem_index(s), MO_TEQ | MO_ALIGN); +} +#define SPEC_in2_m2_64a 0 +#endif + +static void in2_mri2_16u(DisasContext *s, DisasOps *o) +{ + in2_ri2(s, o); + tcg_gen_qemu_ld16u(o->in2, o->in2, get_mem_index(s)); +} +#define SPEC_in2_mri2_16u 0 + +static void in2_mri2_32s(DisasContext *s, DisasOps *o) +{ + in2_ri2(s, o); + tcg_gen_qemu_ld32s(o->in2, o->in2, get_mem_index(s)); +} +#define SPEC_in2_mri2_32s 0 + +static void in2_mri2_32u(DisasContext *s, DisasOps *o) +{ + in2_ri2(s, o); + tcg_gen_qemu_ld32u(o->in2, o->in2, get_mem_index(s)); +} +#define SPEC_in2_mri2_32u 0 + +static void in2_mri2_64(DisasContext *s, DisasOps *o) +{ + in2_ri2(s, o); + tcg_gen_qemu_ld64(o->in2, o->in2, get_mem_index(s)); +} +#define SPEC_in2_mri2_64 0 + +static void in2_i2(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_const_i64(get_field(s, i2)); +} +#define SPEC_in2_i2 0 + +static void in2_i2_8u(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_const_i64((uint8_t)get_field(s, i2)); +} +#define SPEC_in2_i2_8u 0 + +static void in2_i2_16u(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_const_i64((uint16_t)get_field(s, i2)); +} +#define SPEC_in2_i2_16u 0 + +static void in2_i2_32u(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_const_i64((uint32_t)get_field(s, i2)); +} +#define SPEC_in2_i2_32u 0 + +static void in2_i2_16u_shl(DisasContext *s, DisasOps *o) +{ + uint64_t i2 = (uint16_t)get_field(s, i2); + o->in2 = tcg_const_i64(i2 << s->insn->data); +} +#define SPEC_in2_i2_16u_shl 0 + +static void in2_i2_32u_shl(DisasContext *s, DisasOps *o) +{ + uint64_t i2 = (uint32_t)get_field(s, i2); + o->in2 = tcg_const_i64(i2 << s->insn->data); +} +#define SPEC_in2_i2_32u_shl 0 + +#ifndef CONFIG_USER_ONLY +static void in2_insn(DisasContext *s, DisasOps *o) +{ + o->in2 = tcg_const_i64(s->fields.raw_insn); +} +#define SPEC_in2_insn 0 +#endif + +/* ====================================================================== */ + +/* Find opc within the table of insns. This is formulated as a switch + statement so that (1) we get compile-time notice of cut-paste errors + for duplicated opcodes, and (2) the compiler generates the binary + search tree, rather than us having to post-process the table. */ + +#define C(OPC, NM, FT, FC, I1, I2, P, W, OP, CC) \ + E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, 0, 0) + +#define D(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D) \ + E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, 0) + +#define F(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, FL) \ + E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, 0, FL) + +#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) insn_ ## NM, + +enum DisasInsnEnum { +#include "insn-data.def" +}; + +#undef E +#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) { \ + .opc = OPC, \ + .flags = FL, \ + .fmt = FMT_##FT, \ + .fac = FAC_##FC, \ + .spec = SPEC_in1_##I1 | SPEC_in2_##I2 | SPEC_prep_##P | SPEC_wout_##W, \ + .name = #NM, \ + .help_in1 = in1_##I1, \ + .help_in2 = in2_##I2, \ + .help_prep = prep_##P, \ + .help_wout = wout_##W, \ + .help_cout = cout_##CC, \ + .help_op = op_##OP, \ + .data = D \ + }, + +/* Allow 0 to be used for NULL in the table below. */ +#define in1_0 NULL +#define in2_0 NULL +#define prep_0 NULL +#define wout_0 NULL +#define cout_0 NULL +#define op_0 NULL + +#define SPEC_in1_0 0 +#define SPEC_in2_0 0 +#define SPEC_prep_0 0 +#define SPEC_wout_0 0 + +/* Give smaller names to the various facilities. */ +#define FAC_Z S390_FEAT_ZARCH +#define FAC_CASS S390_FEAT_COMPARE_AND_SWAP_AND_STORE +#define FAC_DFP S390_FEAT_DFP +#define FAC_DFPR S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* DFP-rounding */ +#define FAC_DO S390_FEAT_STFLE_45 /* distinct-operands */ +#define FAC_EE S390_FEAT_EXECUTE_EXT +#define FAC_EI S390_FEAT_EXTENDED_IMMEDIATE +#define FAC_FPE S390_FEAT_FLOATING_POINT_EXT +#define FAC_FPSSH S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* FPS-sign-handling */ +#define FAC_FPRGR S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* FPR-GR-transfer */ +#define FAC_GIE S390_FEAT_GENERAL_INSTRUCTIONS_EXT +#define FAC_HFP_MA S390_FEAT_HFP_MADDSUB +#define FAC_HW S390_FEAT_STFLE_45 /* high-word */ +#define FAC_IEEEE_SIM S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* IEEE-exception-simulation */ +#define FAC_MIE S390_FEAT_STFLE_49 /* misc-instruction-extensions */ +#define FAC_LAT S390_FEAT_STFLE_49 /* load-and-trap */ +#define FAC_LOC S390_FEAT_STFLE_45 /* load/store on condition 1 */ +#define FAC_LOC2 S390_FEAT_STFLE_53 /* load/store on condition 2 */ +#define FAC_LD S390_FEAT_LONG_DISPLACEMENT +#define FAC_PC S390_FEAT_STFLE_45 /* population count */ +#define FAC_SCF S390_FEAT_STORE_CLOCK_FAST +#define FAC_SFLE S390_FEAT_STFLE +#define FAC_ILA S390_FEAT_STFLE_45 /* interlocked-access-facility 1 */ +#define FAC_MVCOS S390_FEAT_MOVE_WITH_OPTIONAL_SPEC +#define FAC_LPP S390_FEAT_SET_PROGRAM_PARAMETERS /* load-program-parameter */ +#define FAC_DAT_ENH S390_FEAT_DAT_ENH +#define FAC_E2 S390_FEAT_EXTENDED_TRANSLATION_2 +#define FAC_EH S390_FEAT_STFLE_49 /* execution-hint */ +#define FAC_PPA S390_FEAT_STFLE_49 /* processor-assist */ +#define FAC_LZRB S390_FEAT_STFLE_53 /* load-and-zero-rightmost-byte */ +#define FAC_ETF3 S390_FEAT_EXTENDED_TRANSLATION_3 +#define FAC_MSA S390_FEAT_MSA /* message-security-assist facility */ +#define FAC_MSA3 S390_FEAT_MSA_EXT_3 /* msa-extension-3 facility */ +#define FAC_MSA4 S390_FEAT_MSA_EXT_4 /* msa-extension-4 facility */ +#define FAC_MSA5 S390_FEAT_MSA_EXT_5 /* msa-extension-5 facility */ +#define FAC_MSA8 S390_FEAT_MSA_EXT_8 /* msa-extension-8 facility */ +#define FAC_ECT S390_FEAT_EXTRACT_CPU_TIME +#define FAC_PCI S390_FEAT_ZPCI /* z/PCI facility */ +#define FAC_AIS S390_FEAT_ADAPTER_INT_SUPPRESSION +#define FAC_V S390_FEAT_VECTOR /* vector facility */ +#define FAC_VE S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 */ +#define FAC_MIE2 S390_FEAT_MISC_INSTRUCTION_EXT2 /* miscellaneous-instruction-extensions facility 2 */ + +static const DisasInsn insn_info[] = { +#include "insn-data.def" +}; + +#undef E +#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) \ + case OPC: return &insn_info[insn_ ## NM]; + +static const DisasInsn *lookup_opc(uint16_t opc) +{ + switch (opc) { +#include "insn-data.def" + default: + return NULL; + } +} + +#undef F +#undef E +#undef D +#undef C + +/* Extract a field from the insn. The INSN should be left-aligned in + the uint64_t so that we can more easily utilize the big-bit-endian + definitions we extract from the Principals of Operation. */ + +static void extract_field(DisasFields *o, const DisasField *f, uint64_t insn) +{ + uint32_t r, m; + + if (f->size == 0) { + return; + } + + /* Zero extract the field from the insn. */ + r = (insn << f->beg) >> (64 - f->size); + + /* Sign-extend, or un-swap the field as necessary. */ + switch (f->type) { + case 0: /* unsigned */ + break; + case 1: /* signed */ + assert(f->size <= 32); + m = 1u << (f->size - 1); + r = (r ^ m) - m; + break; + case 2: /* dl+dh split, signed 20 bit. */ + r = ((int8_t)r << 12) | (r >> 8); + break; + case 3: /* MSB stored in RXB */ + g_assert(f->size == 4); + switch (f->beg) { + case 8: + r |= extract64(insn, 63 - 36, 1) << 4; + break; + case 12: + r |= extract64(insn, 63 - 37, 1) << 4; + break; + case 16: + r |= extract64(insn, 63 - 38, 1) << 4; + break; + case 32: + r |= extract64(insn, 63 - 39, 1) << 4; + break; + default: + g_assert_not_reached(); + } + break; + default: + abort(); + } + + /* + * Validate that the "compressed" encoding we selected above is valid. + * I.e. we haven't made two different original fields overlap. + */ + assert(((o->presentC >> f->indexC) & 1) == 0); + o->presentC |= 1 << f->indexC; + o->presentO |= 1 << f->indexO; + + o->c[f->indexC] = r; +} + +/* Lookup the insn at the current PC, extracting the operands into O and + returning the info struct for the insn. Returns NULL for invalid insn. */ + +static const DisasInsn *extract_insn(CPUS390XState *env, DisasContext *s) +{ + uint64_t insn, pc = s->base.pc_next; + int op, op2, ilen; + const DisasInsn *info; + + if (unlikely(s->ex_value)) { + /* Drop the EX data now, so that it's clear on exception paths. */ + TCGv_i64 zero = tcg_const_i64(0); + tcg_gen_st_i64(zero, cpu_env, offsetof(CPUS390XState, ex_value)); + tcg_temp_free_i64(zero); + + /* Extract the values saved by EXECUTE. */ + insn = s->ex_value & 0xffffffffffff0000ull; + ilen = s->ex_value & 0xf; + op = insn >> 56; + } else { + insn = ld_code2(env, pc); + op = (insn >> 8) & 0xff; + ilen = get_ilen(op); + switch (ilen) { + case 2: + insn = insn << 48; + break; + case 4: + insn = ld_code4(env, pc) << 32; + break; + case 6: + insn = (insn << 48) | (ld_code4(env, pc + 2) << 16); + break; + default: + g_assert_not_reached(); + } + } + s->pc_tmp = s->base.pc_next + ilen; + s->ilen = ilen; + + /* We can't actually determine the insn format until we've looked up + the full insn opcode. Which we can't do without locating the + secondary opcode. Assume by default that OP2 is at bit 40; for + those smaller insns that don't actually have a secondary opcode + this will correctly result in OP2 = 0. */ + switch (op) { + case 0x01: /* E */ + case 0x80: /* S */ + case 0x82: /* S */ + case 0x93: /* S */ + case 0xb2: /* S, RRF, RRE, IE */ + case 0xb3: /* RRE, RRD, RRF */ + case 0xb9: /* RRE, RRF */ + case 0xe5: /* SSE, SIL */ + op2 = (insn << 8) >> 56; + break; + case 0xa5: /* RI */ + case 0xa7: /* RI */ + case 0xc0: /* RIL */ + case 0xc2: /* RIL */ + case 0xc4: /* RIL */ + case 0xc6: /* RIL */ + case 0xc8: /* SSF */ + case 0xcc: /* RIL */ + op2 = (insn << 12) >> 60; + break; + case 0xc5: /* MII */ + case 0xc7: /* SMI */ + case 0xd0 ... 0xdf: /* SS */ + case 0xe1: /* SS */ + case 0xe2: /* SS */ + case 0xe8: /* SS */ + case 0xe9: /* SS */ + case 0xea: /* SS */ + case 0xee ... 0xf3: /* SS */ + case 0xf8 ... 0xfd: /* SS */ + op2 = 0; + break; + default: + op2 = (insn << 40) >> 56; + break; + } + + memset(&s->fields, 0, sizeof(s->fields)); + s->fields.raw_insn = insn; + s->fields.op = op; + s->fields.op2 = op2; + + /* Lookup the instruction. */ + info = lookup_opc(op << 8 | op2); + s->insn = info; + + /* If we found it, extract the operands. */ + if (info != NULL) { + DisasFormat fmt = info->fmt; + int i; + + for (i = 0; i < NUM_C_FIELD; ++i) { + extract_field(&s->fields, &format_info[fmt].op[i], insn); + } + } + return info; +} + +static bool is_afp_reg(int reg) +{ + return reg % 2 || reg > 6; +} + +static bool is_fp_pair(int reg) +{ + /* 0,1,4,5,8,9,12,13: to exclude the others, check for single bit */ + return !(reg & 0x2); +} + +static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) +{ + const DisasInsn *insn; + DisasJumpType ret = DISAS_NEXT; + DisasOps o = {}; + bool icount = false; + + /* Search for the insn in the table. */ + insn = extract_insn(env, s); + + /* Emit insn_start now that we know the ILEN. */ + tcg_gen_insn_start(s->base.pc_next, s->cc_op, s->ilen); + + /* Not found means unimplemented/illegal opcode. */ + if (insn == NULL) { + qemu_log_mask(LOG_UNIMP, "unimplemented opcode 0x%02x%02x\n", + s->fields.op, s->fields.op2); + gen_illegal_opcode(s); + ret = DISAS_NORETURN; + goto out; + } + +#ifndef CONFIG_USER_ONLY + if (s->base.tb->flags & FLAG_MASK_PER) { + TCGv_i64 addr = tcg_const_i64(s->base.pc_next); + gen_helper_per_ifetch(cpu_env, addr); + tcg_temp_free_i64(addr); + } +#endif + + /* process flags */ + if (insn->flags) { + /* privileged instruction */ + if ((s->base.tb->flags & FLAG_MASK_PSTATE) && (insn->flags & IF_PRIV)) { + gen_program_exception(s, PGM_PRIVILEGED); + ret = DISAS_NORETURN; + goto out; + } + + /* if AFP is not enabled, instructions and registers are forbidden */ + if (!(s->base.tb->flags & FLAG_MASK_AFP)) { + uint8_t dxc = 0; + + if ((insn->flags & IF_AFP1) && is_afp_reg(get_field(s, r1))) { + dxc = 1; + } + if ((insn->flags & IF_AFP2) && is_afp_reg(get_field(s, r2))) { + dxc = 1; + } + if ((insn->flags & IF_AFP3) && is_afp_reg(get_field(s, r3))) { + dxc = 1; + } + if (insn->flags & IF_BFP) { + dxc = 2; + } + if (insn->flags & IF_DFP) { + dxc = 3; + } + if (insn->flags & IF_VEC) { + dxc = 0xfe; + } + if (dxc) { + gen_data_exception(dxc); + ret = DISAS_NORETURN; + goto out; + } + } + + /* if vector instructions not enabled, executing them is forbidden */ + if (insn->flags & IF_VEC) { + if (!((s->base.tb->flags & FLAG_MASK_VECTOR))) { + gen_data_exception(0xfe); + ret = DISAS_NORETURN; + goto out; + } + } + + /* input/output is the special case for icount mode */ + if (unlikely(insn->flags & IF_IO)) { + icount = tb_cflags(s->base.tb) & CF_USE_ICOUNT; + if (icount) { + gen_io_start(); + } + } + } + + /* Check for insn specification exceptions. */ + if (insn->spec) { + if ((insn->spec & SPEC_r1_even && get_field(s, r1) & 1) || + (insn->spec & SPEC_r2_even && get_field(s, r2) & 1) || + (insn->spec & SPEC_r3_even && get_field(s, r3) & 1) || + (insn->spec & SPEC_r1_f128 && !is_fp_pair(get_field(s, r1))) || + (insn->spec & SPEC_r2_f128 && !is_fp_pair(get_field(s, r2)))) { + gen_program_exception(s, PGM_SPECIFICATION); + ret = DISAS_NORETURN; + goto out; + } + } + + /* Implement the instruction. */ + if (insn->help_in1) { + insn->help_in1(s, &o); + } + if (insn->help_in2) { + insn->help_in2(s, &o); + } + if (insn->help_prep) { + insn->help_prep(s, &o); + } + if (insn->help_op) { + ret = insn->help_op(s, &o); + } + if (ret != DISAS_NORETURN) { + if (insn->help_wout) { + insn->help_wout(s, &o); + } + if (insn->help_cout) { + insn->help_cout(s, &o); + } + } + + /* Free any temporaries created by the helpers. */ + if (o.out && !o.g_out) { + tcg_temp_free_i64(o.out); + } + if (o.out2 && !o.g_out2) { + tcg_temp_free_i64(o.out2); + } + if (o.in1 && !o.g_in1) { + tcg_temp_free_i64(o.in1); + } + if (o.in2 && !o.g_in2) { + tcg_temp_free_i64(o.in2); + } + if (o.addr1) { + tcg_temp_free_i64(o.addr1); + } + + /* io should be the last instruction in tb when icount is enabled */ + if (unlikely(icount && ret == DISAS_NEXT)) { + ret = DISAS_PC_STALE; + } + +#ifndef CONFIG_USER_ONLY + if (s->base.tb->flags & FLAG_MASK_PER) { + /* An exception might be triggered, save PSW if not already done. */ + if (ret == DISAS_NEXT || ret == DISAS_PC_STALE) { + tcg_gen_movi_i64(psw_addr, s->pc_tmp); + } + + /* Call the helper to check for a possible PER exception. */ + gen_helper_per_check_exception(cpu_env); + } +#endif + +out: + /* Advance to the next instruction. */ + s->base.pc_next = s->pc_tmp; + return ret; +} + +static void s390x_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); + + /* 31-bit mode */ + if (!(dc->base.tb->flags & FLAG_MASK_64)) { + dc->base.pc_first &= 0x7fffffff; + dc->base.pc_next = dc->base.pc_first; + } + + dc->cc_op = CC_OP_DYNAMIC; + dc->ex_value = dc->base.tb->cs_base; + dc->do_debug = dc->base.singlestep_enabled; +} + +static void s390x_tr_tb_start(DisasContextBase *db, CPUState *cs) +{ +} + +static void s390x_tr_insn_start(DisasContextBase *dcbase, CPUState *cs) +{ +} + +static bool s390x_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs, + const CPUBreakpoint *bp) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); + + /* + * Emit an insn_start to accompany the breakpoint exception. + * The ILEN value is a dummy, since this does not result in + * an s390x exception, but an internal qemu exception which + * brings us back to interact with the gdbstub. + */ + tcg_gen_insn_start(dc->base.pc_next, dc->cc_op, 2); + + dc->base.is_jmp = DISAS_PC_STALE; + dc->do_debug = true; + /* The address covered by the breakpoint must be included in + [tb->pc, tb->pc + tb->size) in order to for it to be + properly cleared -- thus we increment the PC here so that + the logic setting tb->size does the right thing. */ + dc->base.pc_next += 2; + return true; +} + +static void s390x_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs) +{ + CPUS390XState *env = cs->env_ptr; + DisasContext *dc = container_of(dcbase, DisasContext, base); + + dc->base.is_jmp = translate_one(env, dc); + if (dc->base.is_jmp == DISAS_NEXT) { + uint64_t page_start; + + page_start = dc->base.pc_first & TARGET_PAGE_MASK; + if (dc->base.pc_next - page_start >= TARGET_PAGE_SIZE || dc->ex_value) { + dc->base.is_jmp = DISAS_TOO_MANY; + } + } +} + +static void s390x_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); + + switch (dc->base.is_jmp) { + case DISAS_GOTO_TB: + case DISAS_NORETURN: + break; + case DISAS_TOO_MANY: + case DISAS_PC_STALE: + case DISAS_PC_STALE_NOCHAIN: + update_psw_addr(dc); + /* FALLTHRU */ + case DISAS_PC_UPDATED: + /* Next TB starts off with CC_OP_DYNAMIC, so make sure the + cc op type is in env */ + update_cc_op(dc); + /* FALLTHRU */ + case DISAS_PC_CC_UPDATED: + /* Exit the TB, either by raising a debug exception or by return. */ + if (dc->do_debug) { + gen_exception(EXCP_DEBUG); + } else if ((dc->base.tb->flags & FLAG_MASK_PER) || + dc->base.is_jmp == DISAS_PC_STALE_NOCHAIN) { + tcg_gen_exit_tb(NULL, 0); + } else { + tcg_gen_lookup_and_goto_ptr(); + } + break; + default: + g_assert_not_reached(); + } +} + +static void s390x_tr_disas_log(const DisasContextBase *dcbase, CPUState *cs) +{ + DisasContext *dc = container_of(dcbase, DisasContext, base); + + if (unlikely(dc->ex_value)) { + /* ??? Unfortunately log_target_disas can't use host memory. */ + qemu_log("IN: EXECUTE %016" PRIx64, dc->ex_value); + } else { + qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first)); + log_target_disas(cs, dc->base.pc_first, dc->base.tb->size); + } +} + +static const TranslatorOps s390x_tr_ops = { + .init_disas_context = s390x_tr_init_disas_context, + .tb_start = s390x_tr_tb_start, + .insn_start = s390x_tr_insn_start, + .breakpoint_check = s390x_tr_breakpoint_check, + .translate_insn = s390x_tr_translate_insn, + .tb_stop = s390x_tr_tb_stop, + .disas_log = s390x_tr_disas_log, +}; + +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns) +{ + DisasContext dc; + + translator_loop(&s390x_tr_ops, &dc.base, cs, tb, max_insns); +} + +void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb, + target_ulong *data) +{ + int cc_op = data[1]; + + env->psw.addr = data[0]; + + /* Update the CC opcode if it is not already up-to-date. */ + if ((cc_op != CC_OP_DYNAMIC) && (cc_op != CC_OP_STATIC)) { + env->cc_op = cc_op; + } + + /* Record ILEN. */ + env->int_pgm_ilen = data[2]; +} diff --git a/target/s390x/tcg/translate_vx.c.inc b/target/s390x/tcg/translate_vx.c.inc new file mode 100644 index 0000000000..0afa46e463 --- /dev/null +++ b/target/s390x/tcg/translate_vx.c.inc @@ -0,0 +1,3109 @@ +/* + * QEMU TCG support -- s390x vector instruction translation functions + * + * Copyright (C) 2019 Red Hat Inc + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +/* + * For most instructions that use the same element size for reads and + * writes, we can use real gvec vector expansion, which potantially uses + * real host vector instructions. As they only work up to 64 bit elements, + * 128 bit elements (vector is a single element) have to be handled + * differently. Operations that are too complicated to encode via TCG ops + * are handled via gvec ool (out-of-line) handlers. + * + * As soon as instructions use different element sizes for reads and writes + * or access elements "out of their element scope" we expand them manually + * in fancy loops, as gvec expansion does not deal with actual element + * numbers and does also not support access to other elements. + * + * 128 bit elements: + * As we only have i32/i64, such elements have to be loaded into two + * i64 values and can then be processed e.g. by tcg_gen_add2_i64. + * + * Sizes: + * On s390x, the operand size (oprsz) and the maximum size (maxsz) are + * always 16 (128 bit). What gvec code calls "vece", s390x calls "es", + * a.k.a. "element size". These values nicely map to MO_8 ... MO_64. Only + * 128 bit element size has to be treated in a special way (MO_64 + 1). + * We will use ES_* instead of MO_* for this reason in this file. + * + * CC handling: + * As gvec ool-helpers can currently not return values (besides via + * pointers like vectors or cpu_env), whenever we have to set the CC and + * can't conclude the value from the result vector, we will directly + * set it in "env->cc_op" and mark it as static via set_cc_static()". + * Whenever this is done, the helper writes globals (cc_op). + */ + +#define NUM_VEC_ELEMENT_BYTES(es) (1 << (es)) +#define NUM_VEC_ELEMENTS(es) (16 / NUM_VEC_ELEMENT_BYTES(es)) +#define NUM_VEC_ELEMENT_BITS(es) (NUM_VEC_ELEMENT_BYTES(es) * BITS_PER_BYTE) + +#define ES_8 MO_8 +#define ES_16 MO_16 +#define ES_32 MO_32 +#define ES_64 MO_64 +#define ES_128 4 + +/* Floating-Point Format */ +#define FPF_SHORT 2 +#define FPF_LONG 3 +#define FPF_EXT 4 + +static inline bool valid_vec_element(uint8_t enr, MemOp es) +{ + return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1)); +} + +static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr, + MemOp memop) +{ + const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE); + + switch (memop) { + case ES_8: + tcg_gen_ld8u_i64(dst, cpu_env, offs); + break; + case ES_16: + tcg_gen_ld16u_i64(dst, cpu_env, offs); + break; + case ES_32: + tcg_gen_ld32u_i64(dst, cpu_env, offs); + break; + case ES_8 | MO_SIGN: + tcg_gen_ld8s_i64(dst, cpu_env, offs); + break; + case ES_16 | MO_SIGN: + tcg_gen_ld16s_i64(dst, cpu_env, offs); + break; + case ES_32 | MO_SIGN: + tcg_gen_ld32s_i64(dst, cpu_env, offs); + break; + case ES_64: + case ES_64 | MO_SIGN: + tcg_gen_ld_i64(dst, cpu_env, offs); + break; + default: + g_assert_not_reached(); + } +} + +static void read_vec_element_i32(TCGv_i32 dst, uint8_t reg, uint8_t enr, + MemOp memop) +{ + const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE); + + switch (memop) { + case ES_8: + tcg_gen_ld8u_i32(dst, cpu_env, offs); + break; + case ES_16: + tcg_gen_ld16u_i32(dst, cpu_env, offs); + break; + case ES_8 | MO_SIGN: + tcg_gen_ld8s_i32(dst, cpu_env, offs); + break; + case ES_16 | MO_SIGN: + tcg_gen_ld16s_i32(dst, cpu_env, offs); + break; + case ES_32: + case ES_32 | MO_SIGN: + tcg_gen_ld_i32(dst, cpu_env, offs); + break; + default: + g_assert_not_reached(); + } +} + +static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr, + MemOp memop) +{ + const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE); + + switch (memop) { + case ES_8: + tcg_gen_st8_i64(src, cpu_env, offs); + break; + case ES_16: + tcg_gen_st16_i64(src, cpu_env, offs); + break; + case ES_32: + tcg_gen_st32_i64(src, cpu_env, offs); + break; + case ES_64: + tcg_gen_st_i64(src, cpu_env, offs); + break; + default: + g_assert_not_reached(); + } +} + +static void write_vec_element_i32(TCGv_i32 src, int reg, uint8_t enr, + MemOp memop) +{ + const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE); + + switch (memop) { + case ES_8: + tcg_gen_st8_i32(src, cpu_env, offs); + break; + case ES_16: + tcg_gen_st16_i32(src, cpu_env, offs); + break; + case ES_32: + tcg_gen_st_i32(src, cpu_env, offs); + break; + default: + g_assert_not_reached(); + } +} + +static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr, + uint8_t es) +{ + TCGv_i64 tmp = tcg_temp_new_i64(); + + /* mask off invalid parts from the element nr */ + tcg_gen_andi_i64(tmp, enr, NUM_VEC_ELEMENTS(es) - 1); + + /* convert it to an element offset relative to cpu_env (vec_reg_offset() */ + tcg_gen_shli_i64(tmp, tmp, es); +#ifndef HOST_WORDS_BIGENDIAN + tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es)); +#endif + tcg_gen_addi_i64(tmp, tmp, vec_full_reg_offset(reg)); + + /* generate the final ptr by adding cpu_env */ + tcg_gen_trunc_i64_ptr(ptr, tmp); + tcg_gen_add_ptr(ptr, ptr, cpu_env); + + tcg_temp_free_i64(tmp); +} + +#define gen_gvec_2(v1, v2, gen) \ + tcg_gen_gvec_2(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + 16, 16, gen) +#define gen_gvec_2s(v1, v2, c, gen) \ + tcg_gen_gvec_2s(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + 16, 16, c, gen) +#define gen_gvec_2_ool(v1, v2, data, fn) \ + tcg_gen_gvec_2_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + 16, 16, data, fn) +#define gen_gvec_2i_ool(v1, v2, c, data, fn) \ + tcg_gen_gvec_2i_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + c, 16, 16, data, fn) +#define gen_gvec_2_ptr(v1, v2, ptr, data, fn) \ + tcg_gen_gvec_2_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + ptr, 16, 16, data, fn) +#define gen_gvec_3(v1, v2, v3, gen) \ + tcg_gen_gvec_3(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), 16, 16, gen) +#define gen_gvec_3_ool(v1, v2, v3, data, fn) \ + tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), 16, 16, data, fn) +#define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \ + tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), ptr, 16, 16, data, fn) +#define gen_gvec_3i(v1, v2, v3, c, gen) \ + tcg_gen_gvec_3i(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), 16, 16, c, gen) +#define gen_gvec_4(v1, v2, v3, v4, gen) \ + tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), vec_full_reg_offset(v4), \ + 16, 16, gen) +#define gen_gvec_4_ool(v1, v2, v3, v4, data, fn) \ + tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), vec_full_reg_offset(v4), \ + 16, 16, data, fn) +#define gen_gvec_4_ptr(v1, v2, v3, v4, ptr, data, fn) \ + tcg_gen_gvec_4_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), vec_full_reg_offset(v4), \ + ptr, 16, 16, data, fn) +#define gen_gvec_dup_i64(es, v1, c) \ + tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c) +#define gen_gvec_mov(v1, v2) \ + tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \ + 16) +#define gen_gvec_dup_imm(es, v1, c) \ + tcg_gen_gvec_dup_imm(es, vec_full_reg_offset(v1), 16, 16, c); +#define gen_gvec_fn_2(fn, es, v1, v2) \ + tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + 16, 16) +#define gen_gvec_fn_2i(fn, es, v1, v2, c) \ + tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + c, 16, 16) +#define gen_gvec_fn_2s(fn, es, v1, v2, s) \ + tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + s, 16, 16) +#define gen_gvec_fn_3(fn, es, v1, v2, v3) \ + tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), 16, 16) +#define gen_gvec_fn_4(fn, es, v1, v2, v3, v4) \ + tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), vec_full_reg_offset(v4), 16, 16) + +/* + * Helper to carry out a 128 bit vector computation using 2 i64 values per + * vector. + */ +typedef void (*gen_gvec128_3_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, + TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh); +static void gen_gvec128_3_i64(gen_gvec128_3_i64_fn fn, uint8_t d, uint8_t a, + uint8_t b) +{ + TCGv_i64 dh = tcg_temp_new_i64(); + TCGv_i64 dl = tcg_temp_new_i64(); + TCGv_i64 ah = tcg_temp_new_i64(); + TCGv_i64 al = tcg_temp_new_i64(); + TCGv_i64 bh = tcg_temp_new_i64(); + TCGv_i64 bl = tcg_temp_new_i64(); + + read_vec_element_i64(ah, a, 0, ES_64); + read_vec_element_i64(al, a, 1, ES_64); + read_vec_element_i64(bh, b, 0, ES_64); + read_vec_element_i64(bl, b, 1, ES_64); + fn(dl, dh, al, ah, bl, bh); + write_vec_element_i64(dh, d, 0, ES_64); + write_vec_element_i64(dl, d, 1, ES_64); + + tcg_temp_free_i64(dh); + tcg_temp_free_i64(dl); + tcg_temp_free_i64(ah); + tcg_temp_free_i64(al); + tcg_temp_free_i64(bh); + tcg_temp_free_i64(bl); +} + +typedef void (*gen_gvec128_4_i64_fn)(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, + TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh, + TCGv_i64 cl, TCGv_i64 ch); +static void gen_gvec128_4_i64(gen_gvec128_4_i64_fn fn, uint8_t d, uint8_t a, + uint8_t b, uint8_t c) +{ + TCGv_i64 dh = tcg_temp_new_i64(); + TCGv_i64 dl = tcg_temp_new_i64(); + TCGv_i64 ah = tcg_temp_new_i64(); + TCGv_i64 al = tcg_temp_new_i64(); + TCGv_i64 bh = tcg_temp_new_i64(); + TCGv_i64 bl = tcg_temp_new_i64(); + TCGv_i64 ch = tcg_temp_new_i64(); + TCGv_i64 cl = tcg_temp_new_i64(); + + read_vec_element_i64(ah, a, 0, ES_64); + read_vec_element_i64(al, a, 1, ES_64); + read_vec_element_i64(bh, b, 0, ES_64); + read_vec_element_i64(bl, b, 1, ES_64); + read_vec_element_i64(ch, c, 0, ES_64); + read_vec_element_i64(cl, c, 1, ES_64); + fn(dl, dh, al, ah, bl, bh, cl, ch); + write_vec_element_i64(dh, d, 0, ES_64); + write_vec_element_i64(dl, d, 1, ES_64); + + tcg_temp_free_i64(dh); + tcg_temp_free_i64(dl); + tcg_temp_free_i64(ah); + tcg_temp_free_i64(al); + tcg_temp_free_i64(bh); + tcg_temp_free_i64(bl); + tcg_temp_free_i64(ch); + tcg_temp_free_i64(cl); +} + +static void gen_addi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah, + uint64_t b) +{ + TCGv_i64 bl = tcg_const_i64(b); + TCGv_i64 bh = tcg_const_i64(0); + + tcg_gen_add2_i64(dl, dh, al, ah, bl, bh); + tcg_temp_free_i64(bl); + tcg_temp_free_i64(bh); +} + +static DisasJumpType op_vbperm(DisasContext *s, DisasOps *o) +{ + gen_gvec_3_ool(get_field(s, v1), get_field(s, v2), get_field(s, v3), 0, + gen_helper_gvec_vbperm); + + return DISAS_NEXT; +} + +static DisasJumpType op_vge(DisasContext *s, DisasOps *o) +{ + const uint8_t es = s->insn->data; + const uint8_t enr = get_field(s, m3); + TCGv_i64 tmp; + + if (!valid_vec_element(enr, es)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + read_vec_element_i64(tmp, get_field(s, v2), enr, es); + tcg_gen_add_i64(o->addr1, o->addr1, tmp); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0); + + tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); + write_vec_element_i64(tmp, get_field(s, v1), enr, es); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static uint64_t generate_byte_mask(uint8_t mask) +{ + uint64_t r = 0; + int i; + + for (i = 0; i < 8; i++) { + if ((mask >> i) & 1) { + r |= 0xffull << (i * 8); + } + } + return r; +} + +static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o) +{ + const uint16_t i2 = get_field(s, i2); + + if (i2 == (i2 & 0xff) * 0x0101) { + /* + * Masks for both 64 bit elements of the vector are the same. + * Trust tcg to produce a good constant loading. + */ + gen_gvec_dup_imm(ES_64, get_field(s, v1), + generate_byte_mask(i2 & 0xff)); + } else { + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_movi_i64(t, generate_byte_mask(i2 >> 8)); + write_vec_element_i64(t, get_field(s, v1), 0, ES_64); + tcg_gen_movi_i64(t, generate_byte_mask(i2)); + write_vec_element_i64(t, get_field(s, v1), 1, ES_64); + tcg_temp_free_i64(t); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vgm(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + const uint8_t bits = NUM_VEC_ELEMENT_BITS(es); + const uint8_t i2 = get_field(s, i2) & (bits - 1); + const uint8_t i3 = get_field(s, i3) & (bits - 1); + uint64_t mask = 0; + int i; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* generate the mask - take care of wrapping */ + for (i = i2; ; i = (i + 1) % bits) { + mask |= 1ull << (bits - i - 1); + if (i == i3) { + break; + } + } + + gen_gvec_dup_imm(es, get_field(s, v1), mask); + return DISAS_NEXT; +} + +static DisasJumpType op_vl(DisasContext *s, DisasOps *o) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEQ); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ); + write_vec_element_i64(t0, get_field(s, v1), 0, ES_64); + write_vec_element_i64(t1, get_field(s, v1), 1, ES_64); + tcg_temp_free(t0); + tcg_temp_free(t1); + return DISAS_NEXT; +} + +static DisasJumpType op_vlr(DisasContext *s, DisasOps *o) +{ + gen_gvec_mov(get_field(s, v1), get_field(s, v2)); + return DISAS_NEXT; +} + +static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m3); + TCGv_i64 tmp; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); + gen_gvec_dup_i64(es, get_field(s, v1), tmp); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vle(DisasContext *s, DisasOps *o) +{ + const uint8_t es = s->insn->data; + const uint8_t enr = get_field(s, m3); + TCGv_i64 tmp; + + if (!valid_vec_element(enr, es)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); + write_vec_element_i64(tmp, get_field(s, v1), enr, es); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vlei(DisasContext *s, DisasOps *o) +{ + const uint8_t es = s->insn->data; + const uint8_t enr = get_field(s, m3); + TCGv_i64 tmp; + + if (!valid_vec_element(enr, es)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_const_i64((int16_t)get_field(s, i2)); + write_vec_element_i64(tmp, get_field(s, v1), enr, es); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + TCGv_ptr ptr; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* fast path if we don't need the register content */ + if (!get_field(s, b2)) { + uint8_t enr = get_field(s, d2) & (NUM_VEC_ELEMENTS(es) - 1); + + read_vec_element_i64(o->out, get_field(s, v3), enr, es); + return DISAS_NEXT; + } + + ptr = tcg_temp_new_ptr(); + get_vec_element_ptr_i64(ptr, get_field(s, v3), o->addr1, es); + switch (es) { + case ES_8: + tcg_gen_ld8u_i64(o->out, ptr, 0); + break; + case ES_16: + tcg_gen_ld16u_i64(o->out, ptr, 0); + break; + case ES_32: + tcg_gen_ld32u_i64(o->out, ptr, 0); + break; + case ES_64: + tcg_gen_ld_i64(o->out, ptr, 0); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_ptr(ptr); + + return DISAS_NEXT; +} + +static DisasJumpType op_vllez(DisasContext *s, DisasOps *o) +{ + uint8_t es = get_field(s, m3); + uint8_t enr; + TCGv_i64 t; + + switch (es) { + /* rightmost sub-element of leftmost doubleword */ + case ES_8: + enr = 7; + break; + case ES_16: + enr = 3; + break; + case ES_32: + enr = 1; + break; + case ES_64: + enr = 0; + break; + /* leftmost sub-element of leftmost doubleword */ + case 6: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + es = ES_32; + enr = 0; + break; + } + /* fallthrough */ + default: + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + t = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es); + gen_gvec_dup_imm(es, get_field(s, v1), 0); + write_vec_element_i64(t, get_field(s, v1), enr, es); + tcg_temp_free_i64(t); + return DISAS_NEXT; +} + +static DisasJumpType op_vlm(DisasContext *s, DisasOps *o) +{ + const uint8_t v3 = get_field(s, v3); + uint8_t v1 = get_field(s, v1); + TCGv_i64 t0, t1; + + if (v3 < v1 || (v3 - v1 + 1) > 16) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* + * Check for possible access exceptions by trying to load the last + * element. The first element will be checked first next. + */ + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + gen_addi_and_wrap_i64(s, t0, o->addr1, (v3 - v1) * 16 + 8); + tcg_gen_qemu_ld_i64(t0, t0, get_mem_index(s), MO_TEQ); + + for (;; v1++) { + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ); + write_vec_element_i64(t1, v1, 0, ES_64); + if (v1 == v3) { + break; + } + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ); + write_vec_element_i64(t1, v1, 1, ES_64); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + } + + /* Store the last element, loaded first */ + write_vec_element_i64(t0, v1, 1, ES_64); + + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + return DISAS_NEXT; +} + +static DisasJumpType op_vlbb(DisasContext *s, DisasOps *o) +{ + const int64_t block_size = (1ull << (get_field(s, m3) + 6)); + const int v1_offs = vec_full_reg_offset(get_field(s, v1)); + TCGv_ptr a0; + TCGv_i64 bytes; + + if (get_field(s, m3) > 6) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + bytes = tcg_temp_new_i64(); + a0 = tcg_temp_new_ptr(); + /* calculate the number of bytes until the next block boundary */ + tcg_gen_ori_i64(bytes, o->addr1, -block_size); + tcg_gen_neg_i64(bytes, bytes); + + tcg_gen_addi_ptr(a0, cpu_env, v1_offs); + gen_helper_vll(cpu_env, a0, o->addr1, bytes); + tcg_temp_free_i64(bytes); + tcg_temp_free_ptr(a0); + return DISAS_NEXT; +} + +static DisasJumpType op_vlvg(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + TCGv_ptr ptr; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* fast path if we don't need the register content */ + if (!get_field(s, b2)) { + uint8_t enr = get_field(s, d2) & (NUM_VEC_ELEMENTS(es) - 1); + + write_vec_element_i64(o->in2, get_field(s, v1), enr, es); + return DISAS_NEXT; + } + + ptr = tcg_temp_new_ptr(); + get_vec_element_ptr_i64(ptr, get_field(s, v1), o->addr1, es); + switch (es) { + case ES_8: + tcg_gen_st8_i64(o->in2, ptr, 0); + break; + case ES_16: + tcg_gen_st16_i64(o->in2, ptr, 0); + break; + case ES_32: + tcg_gen_st32_i64(o->in2, ptr, 0); + break; + case ES_64: + tcg_gen_st_i64(o->in2, ptr, 0); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_ptr(ptr); + + return DISAS_NEXT; +} + +static DisasJumpType op_vlvgp(DisasContext *s, DisasOps *o) +{ + write_vec_element_i64(o->in1, get_field(s, v1), 0, ES_64); + write_vec_element_i64(o->in2, get_field(s, v1), 1, ES_64); + return DISAS_NEXT; +} + +static DisasJumpType op_vll(DisasContext *s, DisasOps *o) +{ + const int v1_offs = vec_full_reg_offset(get_field(s, v1)); + TCGv_ptr a0 = tcg_temp_new_ptr(); + + /* convert highest index into an actual length */ + tcg_gen_addi_i64(o->in2, o->in2, 1); + tcg_gen_addi_ptr(a0, cpu_env, v1_offs); + gen_helper_vll(cpu_env, a0, o->addr1, o->in2); + tcg_temp_free_ptr(a0); + return DISAS_NEXT; +} + +static DisasJumpType op_vmr(DisasContext *s, DisasOps *o) +{ + const uint8_t v1 = get_field(s, v1); + const uint8_t v2 = get_field(s, v2); + const uint8_t v3 = get_field(s, v3); + const uint8_t es = get_field(s, m4); + int dst_idx, src_idx; + TCGv_i64 tmp; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + if (s->fields.op2 == 0x61) { + /* iterate backwards to avoid overwriting data we might need later */ + for (dst_idx = NUM_VEC_ELEMENTS(es) - 1; dst_idx >= 0; dst_idx--) { + src_idx = dst_idx / 2; + if (dst_idx % 2 == 0) { + read_vec_element_i64(tmp, v2, src_idx, es); + } else { + read_vec_element_i64(tmp, v3, src_idx, es); + } + write_vec_element_i64(tmp, v1, dst_idx, es); + } + } else { + /* iterate forward to avoid overwriting data we might need later */ + for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(es); dst_idx++) { + src_idx = (dst_idx + NUM_VEC_ELEMENTS(es)) / 2; + if (dst_idx % 2 == 0) { + read_vec_element_i64(tmp, v2, src_idx, es); + } else { + read_vec_element_i64(tmp, v3, src_idx, es); + } + write_vec_element_i64(tmp, v1, dst_idx, es); + } + } + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vpk(DisasContext *s, DisasOps *o) +{ + const uint8_t v1 = get_field(s, v1); + const uint8_t v2 = get_field(s, v2); + const uint8_t v3 = get_field(s, v3); + const uint8_t es = get_field(s, m4); + static gen_helper_gvec_3 * const vpk[3] = { + gen_helper_gvec_vpk16, + gen_helper_gvec_vpk32, + gen_helper_gvec_vpk64, + }; + static gen_helper_gvec_3 * const vpks[3] = { + gen_helper_gvec_vpks16, + gen_helper_gvec_vpks32, + gen_helper_gvec_vpks64, + }; + static gen_helper_gvec_3_ptr * const vpks_cc[3] = { + gen_helper_gvec_vpks_cc16, + gen_helper_gvec_vpks_cc32, + gen_helper_gvec_vpks_cc64, + }; + static gen_helper_gvec_3 * const vpkls[3] = { + gen_helper_gvec_vpkls16, + gen_helper_gvec_vpkls32, + gen_helper_gvec_vpkls64, + }; + static gen_helper_gvec_3_ptr * const vpkls_cc[3] = { + gen_helper_gvec_vpkls_cc16, + gen_helper_gvec_vpkls_cc32, + gen_helper_gvec_vpkls_cc64, + }; + + if (es == ES_8 || es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + switch (s->fields.op2) { + case 0x97: + if (get_field(s, m5) & 0x1) { + gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpks_cc[es - 1]); + set_cc_static(s); + } else { + gen_gvec_3_ool(v1, v2, v3, 0, vpks[es - 1]); + } + break; + case 0x95: + if (get_field(s, m5) & 0x1) { + gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpkls_cc[es - 1]); + set_cc_static(s); + } else { + gen_gvec_3_ool(v1, v2, v3, 0, vpkls[es - 1]); + } + break; + case 0x94: + /* If sources and destination dont't overlap -> fast path */ + if (v1 != v2 && v1 != v3) { + const uint8_t src_es = get_field(s, m4); + const uint8_t dst_es = src_es - 1; + TCGv_i64 tmp = tcg_temp_new_i64(); + int dst_idx, src_idx; + + for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) { + src_idx = dst_idx; + if (src_idx < NUM_VEC_ELEMENTS(src_es)) { + read_vec_element_i64(tmp, v2, src_idx, src_es); + } else { + src_idx -= NUM_VEC_ELEMENTS(src_es); + read_vec_element_i64(tmp, v3, src_idx, src_es); + } + write_vec_element_i64(tmp, v1, dst_idx, dst_es); + } + tcg_temp_free_i64(tmp); + } else { + gen_gvec_3_ool(v1, v2, v3, 0, vpk[es - 1]); + } + break; + default: + g_assert_not_reached(); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vperm(DisasContext *s, DisasOps *o) +{ + gen_gvec_4_ool(get_field(s, v1), get_field(s, v2), + get_field(s, v3), get_field(s, v4), + 0, gen_helper_gvec_vperm); + return DISAS_NEXT; +} + +static DisasJumpType op_vpdi(DisasContext *s, DisasOps *o) +{ + const uint8_t i2 = extract32(get_field(s, m4), 2, 1); + const uint8_t i3 = extract32(get_field(s, m4), 0, 1); + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + read_vec_element_i64(t0, get_field(s, v2), i2, ES_64); + read_vec_element_i64(t1, get_field(s, v3), i3, ES_64); + write_vec_element_i64(t0, get_field(s, v1), 0, ES_64); + write_vec_element_i64(t1, get_field(s, v1), 1, ES_64); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + return DISAS_NEXT; +} + +static DisasJumpType op_vrep(DisasContext *s, DisasOps *o) +{ + const uint8_t enr = get_field(s, i2); + const uint8_t es = get_field(s, m4); + + if (es > ES_64 || !valid_vec_element(enr, es)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tcg_gen_gvec_dup_mem(es, vec_full_reg_offset(get_field(s, v1)), + vec_reg_offset(get_field(s, v3), enr, es), + 16, 16); + return DISAS_NEXT; +} + +static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o) +{ + const int64_t data = (int16_t)get_field(s, i2); + const uint8_t es = get_field(s, m3); + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_dup_imm(es, get_field(s, v1), data); + return DISAS_NEXT; +} + +static DisasJumpType op_vsce(DisasContext *s, DisasOps *o) +{ + const uint8_t es = s->insn->data; + const uint8_t enr = get_field(s, m3); + TCGv_i64 tmp; + + if (!valid_vec_element(enr, es)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + read_vec_element_i64(tmp, get_field(s, v2), enr, es); + tcg_gen_add_i64(o->addr1, o->addr1, tmp); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0); + + read_vec_element_i64(tmp, get_field(s, v1), enr, es); + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vsel(DisasContext *s, DisasOps *o) +{ + gen_gvec_fn_4(bitsel, ES_8, get_field(s, v1), + get_field(s, v4), get_field(s, v2), + get_field(s, v3)); + return DISAS_NEXT; +} + +static DisasJumpType op_vseg(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m3); + int idx1, idx2; + TCGv_i64 tmp; + + switch (es) { + case ES_8: + idx1 = 7; + idx2 = 15; + break; + case ES_16: + idx1 = 3; + idx2 = 7; + break; + case ES_32: + idx1 = 1; + idx2 = 3; + break; + default: + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + read_vec_element_i64(tmp, get_field(s, v2), idx1, es | MO_SIGN); + write_vec_element_i64(tmp, get_field(s, v1), 0, ES_64); + read_vec_element_i64(tmp, get_field(s, v2), idx2, es | MO_SIGN); + write_vec_element_i64(tmp, get_field(s, v1), 1, ES_64); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vst(DisasContext *s, DisasOps *o) +{ + TCGv_i64 tmp = tcg_const_i64(16); + + /* Probe write access before actually modifying memory */ + gen_helper_probe_write_access(cpu_env, o->addr1, tmp); + + read_vec_element_i64(tmp, get_field(s, v1), 0, ES_64); + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + read_vec_element_i64(tmp, get_field(s, v1), 1, ES_64); + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vste(DisasContext *s, DisasOps *o) +{ + const uint8_t es = s->insn->data; + const uint8_t enr = get_field(s, m3); + TCGv_i64 tmp; + + if (!valid_vec_element(enr, es)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + read_vec_element_i64(tmp, get_field(s, v1), enr, es); + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vstm(DisasContext *s, DisasOps *o) +{ + const uint8_t v3 = get_field(s, v3); + uint8_t v1 = get_field(s, v1); + TCGv_i64 tmp; + + while (v3 < v1 || (v3 - v1 + 1) > 16) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* Probe write access before actually modifying memory */ + tmp = tcg_const_i64((v3 - v1 + 1) * 16); + gen_helper_probe_write_access(cpu_env, o->addr1, tmp); + + for (;; v1++) { + read_vec_element_i64(tmp, v1, 0, ES_64); + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + read_vec_element_i64(tmp, v1, 1, ES_64); + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ); + if (v1 == v3) { + break; + } + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + } + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vstl(DisasContext *s, DisasOps *o) +{ + const int v1_offs = vec_full_reg_offset(get_field(s, v1)); + TCGv_ptr a0 = tcg_temp_new_ptr(); + + /* convert highest index into an actual length */ + tcg_gen_addi_i64(o->in2, o->in2, 1); + tcg_gen_addi_ptr(a0, cpu_env, v1_offs); + gen_helper_vstl(cpu_env, a0, o->addr1, o->in2); + tcg_temp_free_ptr(a0); + return DISAS_NEXT; +} + +static DisasJumpType op_vup(DisasContext *s, DisasOps *o) +{ + const bool logical = s->fields.op2 == 0xd4 || s->fields.op2 == 0xd5; + const uint8_t v1 = get_field(s, v1); + const uint8_t v2 = get_field(s, v2); + const uint8_t src_es = get_field(s, m3); + const uint8_t dst_es = src_es + 1; + int dst_idx, src_idx; + TCGv_i64 tmp; + + if (src_es > ES_32) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + if (s->fields.op2 == 0xd7 || s->fields.op2 == 0xd5) { + /* iterate backwards to avoid overwriting data we might need later */ + for (dst_idx = NUM_VEC_ELEMENTS(dst_es) - 1; dst_idx >= 0; dst_idx--) { + src_idx = dst_idx; + read_vec_element_i64(tmp, v2, src_idx, + src_es | (logical ? 0 : MO_SIGN)); + write_vec_element_i64(tmp, v1, dst_idx, dst_es); + } + + } else { + /* iterate forward to avoid overwriting data we might need later */ + for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) { + src_idx = dst_idx + NUM_VEC_ELEMENTS(src_es) / 2; + read_vec_element_i64(tmp, v2, src_idx, + src_es | (logical ? 0 : MO_SIGN)); + write_vec_element_i64(tmp, v1, dst_idx, dst_es); + } + } + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_va(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + + if (es > ES_128) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } else if (es == ES_128) { + gen_gvec128_3_i64(tcg_gen_add2_i64, get_field(s, v1), + get_field(s, v2), get_field(s, v3)); + return DISAS_NEXT; + } + gen_gvec_fn_3(add, es, get_field(s, v1), get_field(s, v2), + get_field(s, v3)); + return DISAS_NEXT; +} + +static void gen_acc(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, uint8_t es) +{ + const uint8_t msb_bit_nr = NUM_VEC_ELEMENT_BITS(es) - 1; + TCGv_i64 msb_mask = tcg_const_i64(dup_const(es, 1ull << msb_bit_nr)); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + TCGv_i64 t3 = tcg_temp_new_i64(); + + /* Calculate the carry into the MSB, ignoring the old MSBs */ + tcg_gen_andc_i64(t1, a, msb_mask); + tcg_gen_andc_i64(t2, b, msb_mask); + tcg_gen_add_i64(t1, t1, t2); + /* Calculate the MSB without any carry into it */ + tcg_gen_xor_i64(t3, a, b); + /* Calculate the carry out of the MSB in the MSB bit position */ + tcg_gen_and_i64(d, a, b); + tcg_gen_and_i64(t1, t1, t3); + tcg_gen_or_i64(d, d, t1); + /* Isolate and shift the carry into position */ + tcg_gen_and_i64(d, d, msb_mask); + tcg_gen_shri_i64(d, d, msb_bit_nr); + + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + tcg_temp_free_i64(t3); +} + +static void gen_acc8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + gen_acc(d, a, b, ES_8); +} + +static void gen_acc16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + gen_acc(d, a, b, ES_16); +} + +static void gen_acc_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_add_i32(t, a, b); + tcg_gen_setcond_i32(TCG_COND_LTU, d, t, b); + tcg_temp_free_i32(t); +} + +static void gen_acc_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_add_i64(t, a, b); + tcg_gen_setcond_i64(TCG_COND_LTU, d, t, b); + tcg_temp_free_i64(t); +} + +static void gen_acc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, + TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) +{ + TCGv_i64 th = tcg_temp_new_i64(); + TCGv_i64 tl = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_const_i64(0); + + tcg_gen_add2_i64(tl, th, al, zero, bl, zero); + tcg_gen_add2_i64(tl, th, th, zero, ah, zero); + tcg_gen_add2_i64(tl, dl, tl, th, bh, zero); + tcg_gen_mov_i64(dh, zero); + + tcg_temp_free_i64(th); + tcg_temp_free_i64(tl); + tcg_temp_free_i64(zero); +} + +static DisasJumpType op_vacc(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + static const GVecGen3 g[4] = { + { .fni8 = gen_acc8_i64, }, + { .fni8 = gen_acc16_i64, }, + { .fni4 = gen_acc_i32, }, + { .fni8 = gen_acc_i64, }, + }; + + if (es > ES_128) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } else if (es == ES_128) { + gen_gvec128_3_i64(gen_acc2_i64, get_field(s, v1), + get_field(s, v2), get_field(s, v3)); + return DISAS_NEXT; + } + gen_gvec_3(get_field(s, v1), get_field(s, v2), + get_field(s, v3), &g[es]); + return DISAS_NEXT; +} + +static void gen_ac2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah, + TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch) +{ + TCGv_i64 tl = tcg_temp_new_i64(); + TCGv_i64 th = tcg_const_i64(0); + + /* extract the carry only */ + tcg_gen_extract_i64(tl, cl, 0, 1); + tcg_gen_add2_i64(dl, dh, al, ah, bl, bh); + tcg_gen_add2_i64(dl, dh, dl, dh, tl, th); + + tcg_temp_free_i64(tl); + tcg_temp_free_i64(th); +} + +static DisasJumpType op_vac(DisasContext *s, DisasOps *o) +{ + if (get_field(s, m5) != ES_128) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec128_4_i64(gen_ac2_i64, get_field(s, v1), + get_field(s, v2), get_field(s, v3), + get_field(s, v4)); + return DISAS_NEXT; +} + +static void gen_accc2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah, + TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch) +{ + TCGv_i64 tl = tcg_temp_new_i64(); + TCGv_i64 th = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_const_i64(0); + + tcg_gen_andi_i64(tl, cl, 1); + tcg_gen_add2_i64(tl, th, tl, zero, al, zero); + tcg_gen_add2_i64(tl, th, tl, th, bl, zero); + tcg_gen_add2_i64(tl, th, th, zero, ah, zero); + tcg_gen_add2_i64(tl, dl, tl, th, bh, zero); + tcg_gen_mov_i64(dh, zero); + + tcg_temp_free_i64(tl); + tcg_temp_free_i64(th); + tcg_temp_free_i64(zero); +} + +static DisasJumpType op_vaccc(DisasContext *s, DisasOps *o) +{ + if (get_field(s, m5) != ES_128) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec128_4_i64(gen_accc2_i64, get_field(s, v1), + get_field(s, v2), get_field(s, v3), + get_field(s, v4)); + return DISAS_NEXT; +} + +static DisasJumpType op_vn(DisasContext *s, DisasOps *o) +{ + gen_gvec_fn_3(and, ES_8, get_field(s, v1), get_field(s, v2), + get_field(s, v3)); + return DISAS_NEXT; +} + +static DisasJumpType op_vnc(DisasContext *s, DisasOps *o) +{ + gen_gvec_fn_3(andc, ES_8, get_field(s, v1), + get_field(s, v2), get_field(s, v3)); + return DISAS_NEXT; +} + +static void gen_avg_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + tcg_gen_ext_i32_i64(t0, a); + tcg_gen_ext_i32_i64(t1, b); + tcg_gen_add_i64(t0, t0, t1); + tcg_gen_addi_i64(t0, t0, 1); + tcg_gen_shri_i64(t0, t0, 1); + tcg_gen_extrl_i64_i32(d, t0); + + tcg_temp_free(t0); + tcg_temp_free(t1); +} + +static void gen_avg_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl) +{ + TCGv_i64 dh = tcg_temp_new_i64(); + TCGv_i64 ah = tcg_temp_new_i64(); + TCGv_i64 bh = tcg_temp_new_i64(); + + /* extending the sign by one bit is sufficient */ + tcg_gen_extract_i64(ah, al, 63, 1); + tcg_gen_extract_i64(bh, bl, 63, 1); + tcg_gen_add2_i64(dl, dh, al, ah, bl, bh); + gen_addi2_i64(dl, dh, dl, dh, 1); + tcg_gen_extract2_i64(dl, dl, dh, 1); + + tcg_temp_free_i64(dh); + tcg_temp_free_i64(ah); + tcg_temp_free_i64(bh); +} + +static DisasJumpType op_vavg(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + static const GVecGen3 g[4] = { + { .fno = gen_helper_gvec_vavg8, }, + { .fno = gen_helper_gvec_vavg16, }, + { .fni4 = gen_avg_i32, }, + { .fni8 = gen_avg_i64, }, + }; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + gen_gvec_3(get_field(s, v1), get_field(s, v2), + get_field(s, v3), &g[es]); + return DISAS_NEXT; +} + +static void gen_avgl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(t0, a); + tcg_gen_extu_i32_i64(t1, b); + tcg_gen_add_i64(t0, t0, t1); + tcg_gen_addi_i64(t0, t0, 1); + tcg_gen_shri_i64(t0, t0, 1); + tcg_gen_extrl_i64_i32(d, t0); + + tcg_temp_free(t0); + tcg_temp_free(t1); +} + +static void gen_avgl_i64(TCGv_i64 dl, TCGv_i64 al, TCGv_i64 bl) +{ + TCGv_i64 dh = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_const_i64(0); + + tcg_gen_add2_i64(dl, dh, al, zero, bl, zero); + gen_addi2_i64(dl, dh, dl, dh, 1); + tcg_gen_extract2_i64(dl, dl, dh, 1); + + tcg_temp_free_i64(dh); + tcg_temp_free_i64(zero); +} + +static DisasJumpType op_vavgl(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + static const GVecGen3 g[4] = { + { .fno = gen_helper_gvec_vavgl8, }, + { .fno = gen_helper_gvec_vavgl16, }, + { .fni4 = gen_avgl_i32, }, + { .fni8 = gen_avgl_i64, }, + }; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + gen_gvec_3(get_field(s, v1), get_field(s, v2), + get_field(s, v3), &g[es]); + return DISAS_NEXT; +} + +static DisasJumpType op_vcksm(DisasContext *s, DisasOps *o) +{ + TCGv_i32 tmp = tcg_temp_new_i32(); + TCGv_i32 sum = tcg_temp_new_i32(); + int i; + + read_vec_element_i32(sum, get_field(s, v3), 1, ES_32); + for (i = 0; i < 4; i++) { + read_vec_element_i32(tmp, get_field(s, v2), i, ES_32); + tcg_gen_add2_i32(tmp, sum, sum, sum, tmp, tmp); + } + gen_gvec_dup_imm(ES_32, get_field(s, v1), 0); + write_vec_element_i32(sum, get_field(s, v1), 1, ES_32); + + tcg_temp_free_i32(tmp); + tcg_temp_free_i32(sum); + return DISAS_NEXT; +} + +static DisasJumpType op_vec(DisasContext *s, DisasOps *o) +{ + uint8_t es = get_field(s, m3); + const uint8_t enr = NUM_VEC_ELEMENTS(es) / 2 - 1; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + if (s->fields.op2 == 0xdb) { + es |= MO_SIGN; + } + + o->in1 = tcg_temp_new_i64(); + o->in2 = tcg_temp_new_i64(); + read_vec_element_i64(o->in1, get_field(s, v1), enr, es); + read_vec_element_i64(o->in2, get_field(s, v2), enr, es); + return DISAS_NEXT; +} + +static DisasJumpType op_vc(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + TCGCond cond = s->insn->data; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tcg_gen_gvec_cmp(cond, es, + vec_full_reg_offset(get_field(s, v1)), + vec_full_reg_offset(get_field(s, v2)), + vec_full_reg_offset(get_field(s, v3)), 16, 16); + if (get_field(s, m5) & 0x1) { + TCGv_i64 low = tcg_temp_new_i64(); + TCGv_i64 high = tcg_temp_new_i64(); + + read_vec_element_i64(high, get_field(s, v1), 0, ES_64); + read_vec_element_i64(low, get_field(s, v1), 1, ES_64); + gen_op_update2_cc_i64(s, CC_OP_VC, low, high); + + tcg_temp_free_i64(low); + tcg_temp_free_i64(high); + } + return DISAS_NEXT; +} + +static void gen_clz_i32(TCGv_i32 d, TCGv_i32 a) +{ + tcg_gen_clzi_i32(d, a, 32); +} + +static void gen_clz_i64(TCGv_i64 d, TCGv_i64 a) +{ + tcg_gen_clzi_i64(d, a, 64); +} + +static DisasJumpType op_vclz(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m3); + static const GVecGen2 g[4] = { + { .fno = gen_helper_gvec_vclz8, }, + { .fno = gen_helper_gvec_vclz16, }, + { .fni4 = gen_clz_i32, }, + { .fni8 = gen_clz_i64, }, + }; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]); + return DISAS_NEXT; +} + +static void gen_ctz_i32(TCGv_i32 d, TCGv_i32 a) +{ + tcg_gen_ctzi_i32(d, a, 32); +} + +static void gen_ctz_i64(TCGv_i64 d, TCGv_i64 a) +{ + tcg_gen_ctzi_i64(d, a, 64); +} + +static DisasJumpType op_vctz(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m3); + static const GVecGen2 g[4] = { + { .fno = gen_helper_gvec_vctz8, }, + { .fno = gen_helper_gvec_vctz16, }, + { .fni4 = gen_ctz_i32, }, + { .fni8 = gen_ctz_i64, }, + }; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]); + return DISAS_NEXT; +} + +static DisasJumpType op_vx(DisasContext *s, DisasOps *o) +{ + gen_gvec_fn_3(xor, ES_8, get_field(s, v1), get_field(s, v2), + get_field(s, v3)); + return DISAS_NEXT; +} + +static DisasJumpType op_vgfm(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + static const GVecGen3 g[4] = { + { .fno = gen_helper_gvec_vgfm8, }, + { .fno = gen_helper_gvec_vgfm16, }, + { .fno = gen_helper_gvec_vgfm32, }, + { .fno = gen_helper_gvec_vgfm64, }, + }; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + gen_gvec_3(get_field(s, v1), get_field(s, v2), + get_field(s, v3), &g[es]); + return DISAS_NEXT; +} + +static DisasJumpType op_vgfma(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m5); + static const GVecGen4 g[4] = { + { .fno = gen_helper_gvec_vgfma8, }, + { .fno = gen_helper_gvec_vgfma16, }, + { .fno = gen_helper_gvec_vgfma32, }, + { .fno = gen_helper_gvec_vgfma64, }, + }; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + gen_gvec_4(get_field(s, v1), get_field(s, v2), + get_field(s, v3), get_field(s, v4), &g[es]); + return DISAS_NEXT; +} + +static DisasJumpType op_vlc(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m3); + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_fn_2(neg, es, get_field(s, v1), get_field(s, v2)); + return DISAS_NEXT; +} + +static DisasJumpType op_vlp(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m3); + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_fn_2(abs, es, get_field(s, v1), get_field(s, v2)); + return DISAS_NEXT; +} + +static DisasJumpType op_vmx(DisasContext *s, DisasOps *o) +{ + const uint8_t v1 = get_field(s, v1); + const uint8_t v2 = get_field(s, v2); + const uint8_t v3 = get_field(s, v3); + const uint8_t es = get_field(s, m4); + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + switch (s->fields.op2) { + case 0xff: + gen_gvec_fn_3(smax, es, v1, v2, v3); + break; + case 0xfd: + gen_gvec_fn_3(umax, es, v1, v2, v3); + break; + case 0xfe: + gen_gvec_fn_3(smin, es, v1, v2, v3); + break; + case 0xfc: + gen_gvec_fn_3(umin, es, v1, v2, v3); + break; + default: + g_assert_not_reached(); + } + return DISAS_NEXT; +} + +static void gen_mal_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c) +{ + TCGv_i32 t0 = tcg_temp_new_i32(); + + tcg_gen_mul_i32(t0, a, b); + tcg_gen_add_i32(d, t0, c); + + tcg_temp_free_i32(t0); +} + +static void gen_mah_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + tcg_gen_ext_i32_i64(t0, a); + tcg_gen_ext_i32_i64(t1, b); + tcg_gen_ext_i32_i64(t2, c); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_add_i64(t0, t0, t2); + tcg_gen_extrh_i64_i32(d, t0); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); +} + +static void gen_malh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, TCGv_i32 c) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(t0, a); + tcg_gen_extu_i32_i64(t1, b); + tcg_gen_extu_i32_i64(t2, c); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_add_i64(t0, t0, t2); + tcg_gen_extrh_i64_i32(d, t0); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); +} + +static DisasJumpType op_vma(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m5); + static const GVecGen4 g_vmal[3] = { + { .fno = gen_helper_gvec_vmal8, }, + { .fno = gen_helper_gvec_vmal16, }, + { .fni4 = gen_mal_i32, }, + }; + static const GVecGen4 g_vmah[3] = { + { .fno = gen_helper_gvec_vmah8, }, + { .fno = gen_helper_gvec_vmah16, }, + { .fni4 = gen_mah_i32, }, + }; + static const GVecGen4 g_vmalh[3] = { + { .fno = gen_helper_gvec_vmalh8, }, + { .fno = gen_helper_gvec_vmalh16, }, + { .fni4 = gen_malh_i32, }, + }; + static const GVecGen4 g_vmae[3] = { + { .fno = gen_helper_gvec_vmae8, }, + { .fno = gen_helper_gvec_vmae16, }, + { .fno = gen_helper_gvec_vmae32, }, + }; + static const GVecGen4 g_vmale[3] = { + { .fno = gen_helper_gvec_vmale8, }, + { .fno = gen_helper_gvec_vmale16, }, + { .fno = gen_helper_gvec_vmale32, }, + }; + static const GVecGen4 g_vmao[3] = { + { .fno = gen_helper_gvec_vmao8, }, + { .fno = gen_helper_gvec_vmao16, }, + { .fno = gen_helper_gvec_vmao32, }, + }; + static const GVecGen4 g_vmalo[3] = { + { .fno = gen_helper_gvec_vmalo8, }, + { .fno = gen_helper_gvec_vmalo16, }, + { .fno = gen_helper_gvec_vmalo32, }, + }; + const GVecGen4 *fn; + + if (es > ES_32) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + switch (s->fields.op2) { + case 0xaa: + fn = &g_vmal[es]; + break; + case 0xab: + fn = &g_vmah[es]; + break; + case 0xa9: + fn = &g_vmalh[es]; + break; + case 0xae: + fn = &g_vmae[es]; + break; + case 0xac: + fn = &g_vmale[es]; + break; + case 0xaf: + fn = &g_vmao[es]; + break; + case 0xad: + fn = &g_vmalo[es]; + break; + default: + g_assert_not_reached(); + } + + gen_gvec_4(get_field(s, v1), get_field(s, v2), + get_field(s, v3), get_field(s, v4), fn); + return DISAS_NEXT; +} + +static void gen_mh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_muls2_i32(t, d, a, b); + tcg_temp_free_i32(t); +} + +static void gen_mlh_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_mulu2_i32(t, d, a, b); + tcg_temp_free_i32(t); +} + +static DisasJumpType op_vm(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + static const GVecGen3 g_vmh[3] = { + { .fno = gen_helper_gvec_vmh8, }, + { .fno = gen_helper_gvec_vmh16, }, + { .fni4 = gen_mh_i32, }, + }; + static const GVecGen3 g_vmlh[3] = { + { .fno = gen_helper_gvec_vmlh8, }, + { .fno = gen_helper_gvec_vmlh16, }, + { .fni4 = gen_mlh_i32, }, + }; + static const GVecGen3 g_vme[3] = { + { .fno = gen_helper_gvec_vme8, }, + { .fno = gen_helper_gvec_vme16, }, + { .fno = gen_helper_gvec_vme32, }, + }; + static const GVecGen3 g_vmle[3] = { + { .fno = gen_helper_gvec_vmle8, }, + { .fno = gen_helper_gvec_vmle16, }, + { .fno = gen_helper_gvec_vmle32, }, + }; + static const GVecGen3 g_vmo[3] = { + { .fno = gen_helper_gvec_vmo8, }, + { .fno = gen_helper_gvec_vmo16, }, + { .fno = gen_helper_gvec_vmo32, }, + }; + static const GVecGen3 g_vmlo[3] = { + { .fno = gen_helper_gvec_vmlo8, }, + { .fno = gen_helper_gvec_vmlo16, }, + { .fno = gen_helper_gvec_vmlo32, }, + }; + const GVecGen3 *fn; + + if (es > ES_32) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + switch (s->fields.op2) { + case 0xa2: + gen_gvec_fn_3(mul, es, get_field(s, v1), + get_field(s, v2), get_field(s, v3)); + return DISAS_NEXT; + case 0xa3: + fn = &g_vmh[es]; + break; + case 0xa1: + fn = &g_vmlh[es]; + break; + case 0xa6: + fn = &g_vme[es]; + break; + case 0xa4: + fn = &g_vmle[es]; + break; + case 0xa7: + fn = &g_vmo[es]; + break; + case 0xa5: + fn = &g_vmlo[es]; + break; + default: + g_assert_not_reached(); + } + + gen_gvec_3(get_field(s, v1), get_field(s, v2), + get_field(s, v3), fn); + return DISAS_NEXT; +} + +static DisasJumpType op_vmsl(DisasContext *s, DisasOps *o) +{ + TCGv_i64 l1, h1, l2, h2; + + if (get_field(s, m5) != ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + l1 = tcg_temp_new_i64(); + h1 = tcg_temp_new_i64(); + l2 = tcg_temp_new_i64(); + h2 = tcg_temp_new_i64(); + + /* Multipy both even elements from v2 and v3 */ + read_vec_element_i64(l1, get_field(s, v2), 0, ES_64); + read_vec_element_i64(h1, get_field(s, v3), 0, ES_64); + tcg_gen_mulu2_i64(l1, h1, l1, h1); + /* Shift result left by one (x2) if requested */ + if (extract32(get_field(s, m6), 3, 1)) { + tcg_gen_add2_i64(l1, h1, l1, h1, l1, h1); + } + + /* Multipy both odd elements from v2 and v3 */ + read_vec_element_i64(l2, get_field(s, v2), 1, ES_64); + read_vec_element_i64(h2, get_field(s, v3), 1, ES_64); + tcg_gen_mulu2_i64(l2, h2, l2, h2); + /* Shift result left by one (x2) if requested */ + if (extract32(get_field(s, m6), 2, 1)) { + tcg_gen_add2_i64(l2, h2, l2, h2, l2, h2); + } + + /* Add both intermediate results */ + tcg_gen_add2_i64(l1, h1, l1, h1, l2, h2); + /* Add whole v4 */ + read_vec_element_i64(h2, get_field(s, v4), 0, ES_64); + read_vec_element_i64(l2, get_field(s, v4), 1, ES_64); + tcg_gen_add2_i64(l1, h1, l1, h1, l2, h2); + + /* Store final result into v1. */ + write_vec_element_i64(h1, get_field(s, v1), 0, ES_64); + write_vec_element_i64(l1, get_field(s, v1), 1, ES_64); + + tcg_temp_free_i64(l1); + tcg_temp_free_i64(h1); + tcg_temp_free_i64(l2); + tcg_temp_free_i64(h2); + return DISAS_NEXT; +} + +static DisasJumpType op_vnn(DisasContext *s, DisasOps *o) +{ + gen_gvec_fn_3(nand, ES_8, get_field(s, v1), + get_field(s, v2), get_field(s, v3)); + return DISAS_NEXT; +} + +static DisasJumpType op_vno(DisasContext *s, DisasOps *o) +{ + gen_gvec_fn_3(nor, ES_8, get_field(s, v1), get_field(s, v2), + get_field(s, v3)); + return DISAS_NEXT; +} + +static DisasJumpType op_vnx(DisasContext *s, DisasOps *o) +{ + gen_gvec_fn_3(eqv, ES_8, get_field(s, v1), get_field(s, v2), + get_field(s, v3)); + return DISAS_NEXT; +} + +static DisasJumpType op_vo(DisasContext *s, DisasOps *o) +{ + gen_gvec_fn_3(or, ES_8, get_field(s, v1), get_field(s, v2), + get_field(s, v3)); + return DISAS_NEXT; +} + +static DisasJumpType op_voc(DisasContext *s, DisasOps *o) +{ + gen_gvec_fn_3(orc, ES_8, get_field(s, v1), get_field(s, v2), + get_field(s, v3)); + return DISAS_NEXT; +} + +static DisasJumpType op_vpopct(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m3); + static const GVecGen2 g[4] = { + { .fno = gen_helper_gvec_vpopct8, }, + { .fno = gen_helper_gvec_vpopct16, }, + { .fni4 = tcg_gen_ctpop_i32, }, + { .fni8 = tcg_gen_ctpop_i64, }, + }; + + if (es > ES_64 || (es != ES_8 && !s390_has_feat(S390_FEAT_VECTOR_ENH))) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_2(get_field(s, v1), get_field(s, v2), &g[es]); + return DISAS_NEXT; +} + +static void gen_rim_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b, int32_t c) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_rotli_i32(t, a, c & 31); + tcg_gen_and_i32(t, t, b); + tcg_gen_andc_i32(d, d, b); + tcg_gen_or_i32(d, d, t); + + tcg_temp_free_i32(t); +} + +static void gen_rim_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, int64_t c) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_rotli_i64(t, a, c & 63); + tcg_gen_and_i64(t, t, b); + tcg_gen_andc_i64(d, d, b); + tcg_gen_or_i64(d, d, t); + + tcg_temp_free_i64(t); +} + +static DisasJumpType op_verim(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m5); + const uint8_t i4 = get_field(s, i4) & + (NUM_VEC_ELEMENT_BITS(es) - 1); + static const GVecGen3i g[4] = { + { .fno = gen_helper_gvec_verim8, }, + { .fno = gen_helper_gvec_verim16, }, + { .fni4 = gen_rim_i32, + .load_dest = true, }, + { .fni8 = gen_rim_i64, + .load_dest = true, }, + }; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_3i(get_field(s, v1), get_field(s, v2), + get_field(s, v3), i4, &g[es]); + return DISAS_NEXT; +} + +static DisasJumpType op_vesv(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + const uint8_t v1 = get_field(s, v1); + const uint8_t v2 = get_field(s, v2); + const uint8_t v3 = get_field(s, v3); + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + switch (s->fields.op2) { + case 0x70: + gen_gvec_fn_3(shlv, es, v1, v2, v3); + break; + case 0x73: + gen_gvec_fn_3(rotlv, es, v1, v2, v3); + break; + case 0x7a: + gen_gvec_fn_3(sarv, es, v1, v2, v3); + break; + case 0x78: + gen_gvec_fn_3(shrv, es, v1, v2, v3); + break; + default: + g_assert_not_reached(); + } + return DISAS_NEXT; +} + +static DisasJumpType op_ves(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + const uint8_t d2 = get_field(s, d2) & + (NUM_VEC_ELEMENT_BITS(es) - 1); + const uint8_t v1 = get_field(s, v1); + const uint8_t v3 = get_field(s, v3); + TCGv_i32 shift; + + if (es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (likely(!get_field(s, b2))) { + switch (s->fields.op2) { + case 0x30: + gen_gvec_fn_2i(shli, es, v1, v3, d2); + break; + case 0x33: + gen_gvec_fn_2i(rotli, es, v1, v3, d2); + break; + case 0x3a: + gen_gvec_fn_2i(sari, es, v1, v3, d2); + break; + case 0x38: + gen_gvec_fn_2i(shri, es, v1, v3, d2); + break; + default: + g_assert_not_reached(); + } + } else { + shift = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(shift, o->addr1); + tcg_gen_andi_i32(shift, shift, NUM_VEC_ELEMENT_BITS(es) - 1); + switch (s->fields.op2) { + case 0x30: + gen_gvec_fn_2s(shls, es, v1, v3, shift); + break; + case 0x33: + gen_gvec_fn_2s(rotls, es, v1, v3, shift); + break; + case 0x3a: + gen_gvec_fn_2s(sars, es, v1, v3, shift); + break; + case 0x38: + gen_gvec_fn_2s(shrs, es, v1, v3, shift); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_i32(shift); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vsl(DisasContext *s, DisasOps *o) +{ + TCGv_i64 shift = tcg_temp_new_i64(); + + read_vec_element_i64(shift, get_field(s, v3), 7, ES_8); + if (s->fields.op2 == 0x74) { + tcg_gen_andi_i64(shift, shift, 0x7); + } else { + tcg_gen_andi_i64(shift, shift, 0x78); + } + + gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2), + shift, 0, gen_helper_gvec_vsl); + tcg_temp_free_i64(shift); + return DISAS_NEXT; +} + +static DisasJumpType op_vsldb(DisasContext *s, DisasOps *o) +{ + const uint8_t i4 = get_field(s, i4) & 0xf; + const int left_shift = (i4 & 7) * 8; + const int right_shift = 64 - left_shift; + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + if ((i4 & 8) == 0) { + read_vec_element_i64(t0, get_field(s, v2), 0, ES_64); + read_vec_element_i64(t1, get_field(s, v2), 1, ES_64); + read_vec_element_i64(t2, get_field(s, v3), 0, ES_64); + } else { + read_vec_element_i64(t0, get_field(s, v2), 1, ES_64); + read_vec_element_i64(t1, get_field(s, v3), 0, ES_64); + read_vec_element_i64(t2, get_field(s, v3), 1, ES_64); + } + tcg_gen_extract2_i64(t0, t1, t0, right_shift); + tcg_gen_extract2_i64(t1, t2, t1, right_shift); + write_vec_element_i64(t0, get_field(s, v1), 0, ES_64); + write_vec_element_i64(t1, get_field(s, v1), 1, ES_64); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); + return DISAS_NEXT; +} + +static DisasJumpType op_vsra(DisasContext *s, DisasOps *o) +{ + TCGv_i64 shift = tcg_temp_new_i64(); + + read_vec_element_i64(shift, get_field(s, v3), 7, ES_8); + if (s->fields.op2 == 0x7e) { + tcg_gen_andi_i64(shift, shift, 0x7); + } else { + tcg_gen_andi_i64(shift, shift, 0x78); + } + + gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2), + shift, 0, gen_helper_gvec_vsra); + tcg_temp_free_i64(shift); + return DISAS_NEXT; +} + +static DisasJumpType op_vsrl(DisasContext *s, DisasOps *o) +{ + TCGv_i64 shift = tcg_temp_new_i64(); + + read_vec_element_i64(shift, get_field(s, v3), 7, ES_8); + if (s->fields.op2 == 0x7c) { + tcg_gen_andi_i64(shift, shift, 0x7); + } else { + tcg_gen_andi_i64(shift, shift, 0x78); + } + + gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2), + shift, 0, gen_helper_gvec_vsrl); + tcg_temp_free_i64(shift); + return DISAS_NEXT; +} + +static DisasJumpType op_vs(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + + if (es > ES_128) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } else if (es == ES_128) { + gen_gvec128_3_i64(tcg_gen_sub2_i64, get_field(s, v1), + get_field(s, v2), get_field(s, v3)); + return DISAS_NEXT; + } + gen_gvec_fn_3(sub, es, get_field(s, v1), get_field(s, v2), + get_field(s, v3)); + return DISAS_NEXT; +} + +static void gen_scbi_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_setcond_i32(TCG_COND_GEU, d, a, b); +} + +static void gen_scbi_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_setcond_i64(TCG_COND_GEU, d, a, b); +} + +static void gen_scbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, + TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) +{ + TCGv_i64 th = tcg_temp_new_i64(); + TCGv_i64 tl = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_const_i64(0); + + tcg_gen_sub2_i64(tl, th, al, zero, bl, zero); + tcg_gen_andi_i64(th, th, 1); + tcg_gen_sub2_i64(tl, th, ah, zero, th, zero); + tcg_gen_sub2_i64(tl, th, tl, th, bh, zero); + /* "invert" the result: -1 -> 0; 0 -> 1 */ + tcg_gen_addi_i64(dl, th, 1); + tcg_gen_mov_i64(dh, zero); + + tcg_temp_free_i64(th); + tcg_temp_free_i64(tl); + tcg_temp_free_i64(zero); +} + +static DisasJumpType op_vscbi(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + static const GVecGen3 g[4] = { + { .fno = gen_helper_gvec_vscbi8, }, + { .fno = gen_helper_gvec_vscbi16, }, + { .fni4 = gen_scbi_i32, }, + { .fni8 = gen_scbi_i64, }, + }; + + if (es > ES_128) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } else if (es == ES_128) { + gen_gvec128_3_i64(gen_scbi2_i64, get_field(s, v1), + get_field(s, v2), get_field(s, v3)); + return DISAS_NEXT; + } + gen_gvec_3(get_field(s, v1), get_field(s, v2), + get_field(s, v3), &g[es]); + return DISAS_NEXT; +} + +static void gen_sbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah, + TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch) +{ + TCGv_i64 tl = tcg_temp_new_i64(); + TCGv_i64 th = tcg_temp_new_i64(); + + tcg_gen_not_i64(tl, bl); + tcg_gen_not_i64(th, bh); + gen_ac2_i64(dl, dh, al, ah, tl, th, cl, ch); + tcg_temp_free_i64(tl); + tcg_temp_free_i64(th); +} + +static DisasJumpType op_vsbi(DisasContext *s, DisasOps *o) +{ + if (get_field(s, m5) != ES_128) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec128_4_i64(gen_sbi2_i64, get_field(s, v1), + get_field(s, v2), get_field(s, v3), + get_field(s, v4)); + return DISAS_NEXT; +} + +static void gen_sbcbi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah, + TCGv_i64 bl, TCGv_i64 bh, TCGv_i64 cl, TCGv_i64 ch) +{ + TCGv_i64 th = tcg_temp_new_i64(); + TCGv_i64 tl = tcg_temp_new_i64(); + + tcg_gen_not_i64(tl, bl); + tcg_gen_not_i64(th, bh); + gen_accc2_i64(dl, dh, al, ah, tl, th, cl, ch); + + tcg_temp_free_i64(tl); + tcg_temp_free_i64(th); +} + +static DisasJumpType op_vsbcbi(DisasContext *s, DisasOps *o) +{ + if (get_field(s, m5) != ES_128) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec128_4_i64(gen_sbcbi2_i64, get_field(s, v1), + get_field(s, v2), get_field(s, v3), + get_field(s, v4)); + return DISAS_NEXT; +} + +static DisasJumpType op_vsumg(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + TCGv_i64 sum, tmp; + uint8_t dst_idx; + + if (es == ES_8 || es > ES_32) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + sum = tcg_temp_new_i64(); + tmp = tcg_temp_new_i64(); + for (dst_idx = 0; dst_idx < 2; dst_idx++) { + uint8_t idx = dst_idx * NUM_VEC_ELEMENTS(es) / 2; + const uint8_t max_idx = idx + NUM_VEC_ELEMENTS(es) / 2 - 1; + + read_vec_element_i64(sum, get_field(s, v3), max_idx, es); + for (; idx <= max_idx; idx++) { + read_vec_element_i64(tmp, get_field(s, v2), idx, es); + tcg_gen_add_i64(sum, sum, tmp); + } + write_vec_element_i64(sum, get_field(s, v1), dst_idx, ES_64); + } + tcg_temp_free_i64(sum); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vsumq(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + const uint8_t max_idx = NUM_VEC_ELEMENTS(es) - 1; + TCGv_i64 sumh, suml, zero, tmpl; + uint8_t idx; + + if (es < ES_32 || es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + sumh = tcg_const_i64(0); + suml = tcg_temp_new_i64(); + zero = tcg_const_i64(0); + tmpl = tcg_temp_new_i64(); + + read_vec_element_i64(suml, get_field(s, v3), max_idx, es); + for (idx = 0; idx <= max_idx; idx++) { + read_vec_element_i64(tmpl, get_field(s, v2), idx, es); + tcg_gen_add2_i64(suml, sumh, suml, sumh, tmpl, zero); + } + write_vec_element_i64(sumh, get_field(s, v1), 0, ES_64); + write_vec_element_i64(suml, get_field(s, v1), 1, ES_64); + + tcg_temp_free_i64(sumh); + tcg_temp_free_i64(suml); + tcg_temp_free_i64(zero); + tcg_temp_free_i64(tmpl); + return DISAS_NEXT; +} + +static DisasJumpType op_vsum(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + TCGv_i32 sum, tmp; + uint8_t dst_idx; + + if (es > ES_16) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + sum = tcg_temp_new_i32(); + tmp = tcg_temp_new_i32(); + for (dst_idx = 0; dst_idx < 4; dst_idx++) { + uint8_t idx = dst_idx * NUM_VEC_ELEMENTS(es) / 4; + const uint8_t max_idx = idx + NUM_VEC_ELEMENTS(es) / 4 - 1; + + read_vec_element_i32(sum, get_field(s, v3), max_idx, es); + for (; idx <= max_idx; idx++) { + read_vec_element_i32(tmp, get_field(s, v2), idx, es); + tcg_gen_add_i32(sum, sum, tmp); + } + write_vec_element_i32(sum, get_field(s, v1), dst_idx, ES_32); + } + tcg_temp_free_i32(sum); + tcg_temp_free_i32(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vtm(DisasContext *s, DisasOps *o) +{ + gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), + cpu_env, 0, gen_helper_gvec_vtm); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_vfae(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + const uint8_t m5 = get_field(s, m5); + static gen_helper_gvec_3 * const g[3] = { + gen_helper_gvec_vfae8, + gen_helper_gvec_vfae16, + gen_helper_gvec_vfae32, + }; + static gen_helper_gvec_3_ptr * const g_cc[3] = { + gen_helper_gvec_vfae_cc8, + gen_helper_gvec_vfae_cc16, + gen_helper_gvec_vfae_cc32, + }; + if (es > ES_32) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m5, 0, 1)) { + gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), + get_field(s, v3), cpu_env, m5, g_cc[es]); + set_cc_static(s); + } else { + gen_gvec_3_ool(get_field(s, v1), get_field(s, v2), + get_field(s, v3), m5, g[es]); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vfee(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + const uint8_t m5 = get_field(s, m5); + static gen_helper_gvec_3 * const g[3] = { + gen_helper_gvec_vfee8, + gen_helper_gvec_vfee16, + gen_helper_gvec_vfee32, + }; + static gen_helper_gvec_3_ptr * const g_cc[3] = { + gen_helper_gvec_vfee_cc8, + gen_helper_gvec_vfee_cc16, + gen_helper_gvec_vfee_cc32, + }; + + if (es > ES_32 || m5 & ~0x3) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m5, 0, 1)) { + gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), + get_field(s, v3), cpu_env, m5, g_cc[es]); + set_cc_static(s); + } else { + gen_gvec_3_ool(get_field(s, v1), get_field(s, v2), + get_field(s, v3), m5, g[es]); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vfene(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + const uint8_t m5 = get_field(s, m5); + static gen_helper_gvec_3 * const g[3] = { + gen_helper_gvec_vfene8, + gen_helper_gvec_vfene16, + gen_helper_gvec_vfene32, + }; + static gen_helper_gvec_3_ptr * const g_cc[3] = { + gen_helper_gvec_vfene_cc8, + gen_helper_gvec_vfene_cc16, + gen_helper_gvec_vfene_cc32, + }; + + if (es > ES_32 || m5 & ~0x3) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m5, 0, 1)) { + gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), + get_field(s, v3), cpu_env, m5, g_cc[es]); + set_cc_static(s); + } else { + gen_gvec_3_ool(get_field(s, v1), get_field(s, v2), + get_field(s, v3), m5, g[es]); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vistr(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m4); + const uint8_t m5 = get_field(s, m5); + static gen_helper_gvec_2 * const g[3] = { + gen_helper_gvec_vistr8, + gen_helper_gvec_vistr16, + gen_helper_gvec_vistr32, + }; + static gen_helper_gvec_2_ptr * const g_cc[3] = { + gen_helper_gvec_vistr_cc8, + gen_helper_gvec_vistr_cc16, + gen_helper_gvec_vistr_cc32, + }; + + if (es > ES_32 || m5 & ~0x1) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m5, 0, 1)) { + gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), + cpu_env, 0, g_cc[es]); + set_cc_static(s); + } else { + gen_gvec_2_ool(get_field(s, v1), get_field(s, v2), 0, + g[es]); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m5); + const uint8_t m6 = get_field(s, m6); + static gen_helper_gvec_4 * const g[3] = { + gen_helper_gvec_vstrc8, + gen_helper_gvec_vstrc16, + gen_helper_gvec_vstrc32, + }; + static gen_helper_gvec_4 * const g_rt[3] = { + gen_helper_gvec_vstrc_rt8, + gen_helper_gvec_vstrc_rt16, + gen_helper_gvec_vstrc_rt32, + }; + static gen_helper_gvec_4_ptr * const g_cc[3] = { + gen_helper_gvec_vstrc_cc8, + gen_helper_gvec_vstrc_cc16, + gen_helper_gvec_vstrc_cc32, + }; + static gen_helper_gvec_4_ptr * const g_cc_rt[3] = { + gen_helper_gvec_vstrc_cc_rt8, + gen_helper_gvec_vstrc_cc_rt16, + gen_helper_gvec_vstrc_cc_rt32, + }; + + if (es > ES_32) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m6, 0, 1)) { + if (extract32(m6, 2, 1)) { + gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2), + get_field(s, v3), get_field(s, v4), + cpu_env, m6, g_cc_rt[es]); + } else { + gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2), + get_field(s, v3), get_field(s, v4), + cpu_env, m6, g_cc[es]); + } + set_cc_static(s); + } else { + if (extract32(m6, 2, 1)) { + gen_gvec_4_ool(get_field(s, v1), get_field(s, v2), + get_field(s, v3), get_field(s, v4), + m6, g_rt[es]); + } else { + gen_gvec_4_ool(get_field(s, v1), get_field(s, v2), + get_field(s, v3), get_field(s, v4), + m6, g[es]); + } + } + return DISAS_NEXT; +} + +static DisasJumpType op_vfa(DisasContext *s, DisasOps *o) +{ + const uint8_t fpf = get_field(s, m4); + const uint8_t m5 = get_field(s, m5); + gen_helper_gvec_3_ptr *fn = NULL; + + switch (s->fields.op2) { + case 0xe3: + switch (fpf) { + case FPF_SHORT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfa32; + } + break; + case FPF_LONG: + fn = gen_helper_gvec_vfa64; + break; + case FPF_EXT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfa128; + } + break; + default: + break; + } + break; + case 0xe5: + switch (fpf) { + case FPF_SHORT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfd32; + } + break; + case FPF_LONG: + fn = gen_helper_gvec_vfd64; + break; + case FPF_EXT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfd128; + } + break; + default: + break; + } + break; + case 0xe7: + switch (fpf) { + case FPF_SHORT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfm32; + } + break; + case FPF_LONG: + fn = gen_helper_gvec_vfm64; + break; + case FPF_EXT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfm128; + } + break; + default: + break; + } + break; + case 0xe2: + switch (fpf) { + case FPF_SHORT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfs32; + } + break; + case FPF_LONG: + fn = gen_helper_gvec_vfs64; + break; + case FPF_EXT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfs128; + } + break; + default: + break; + } + break; + default: + g_assert_not_reached(); + } + + if (!fn || extract32(m5, 0, 3)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), + get_field(s, v3), cpu_env, m5, fn); + return DISAS_NEXT; +} + +static DisasJumpType op_wfc(DisasContext *s, DisasOps *o) +{ + const uint8_t fpf = get_field(s, m3); + const uint8_t m4 = get_field(s, m4); + gen_helper_gvec_2_ptr *fn = NULL; + + switch (fpf) { + case FPF_SHORT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_wfk32; + if (s->fields.op2 == 0xcb) { + fn = gen_helper_gvec_wfc32; + } + } + break; + case FPF_LONG: + fn = gen_helper_gvec_wfk64; + if (s->fields.op2 == 0xcb) { + fn = gen_helper_gvec_wfc64; + } + break; + case FPF_EXT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_wfk128; + if (s->fields.op2 == 0xcb) { + fn = gen_helper_gvec_wfc128; + } + } + break; + default: + break; + }; + + if (!fn || m4) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, 0, fn); + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_vfc(DisasContext *s, DisasOps *o) +{ + const uint8_t fpf = get_field(s, m4); + const uint8_t m5 = get_field(s, m5); + const uint8_t m6 = get_field(s, m6); + const bool cs = extract32(m6, 0, 1); + const bool sq = extract32(m5, 2, 1); + gen_helper_gvec_3_ptr *fn = NULL; + + switch (s->fields.op2) { + case 0xe8: + switch (fpf) { + case FPF_SHORT: + fn = cs ? gen_helper_gvec_vfce32_cc : gen_helper_gvec_vfce32; + break; + case FPF_LONG: + fn = cs ? gen_helper_gvec_vfce64_cc : gen_helper_gvec_vfce64; + break; + case FPF_EXT: + fn = cs ? gen_helper_gvec_vfce128_cc : gen_helper_gvec_vfce128; + break; + default: + break; + } + break; + case 0xeb: + switch (fpf) { + case FPF_SHORT: + fn = cs ? gen_helper_gvec_vfch32_cc : gen_helper_gvec_vfch32; + break; + case FPF_LONG: + fn = cs ? gen_helper_gvec_vfch64_cc : gen_helper_gvec_vfch64; + break; + case FPF_EXT: + fn = cs ? gen_helper_gvec_vfch128_cc : gen_helper_gvec_vfch128; + break; + default: + break; + } + break; + case 0xea: + switch (fpf) { + case FPF_SHORT: + fn = cs ? gen_helper_gvec_vfche32_cc : gen_helper_gvec_vfche32; + break; + case FPF_LONG: + fn = cs ? gen_helper_gvec_vfche64_cc : gen_helper_gvec_vfche64; + break; + case FPF_EXT: + fn = cs ? gen_helper_gvec_vfche128_cc : gen_helper_gvec_vfche128; + break; + default: + break; + } + break; + default: + g_assert_not_reached(); + } + + if (!fn || extract32(m5, 0, 2) || extract32(m6, 1, 3) || + (!s390_has_feat(S390_FEAT_VECTOR_ENH) && (fpf != FPF_LONG || sq))) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), get_field(s, v3), + cpu_env, m5, fn); + if (cs) { + set_cc_static(s); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o) +{ + const uint8_t fpf = get_field(s, m3); + const uint8_t m4 = get_field(s, m4); + const uint8_t erm = get_field(s, m5); + gen_helper_gvec_2_ptr *fn = NULL; + + + switch (s->fields.op2) { + case 0xc3: + if (fpf == FPF_LONG) { + fn = gen_helper_gvec_vcdg64; + } + break; + case 0xc1: + if (fpf == FPF_LONG) { + fn = gen_helper_gvec_vcdlg64; + } + break; + case 0xc2: + if (fpf == FPF_LONG) { + fn = gen_helper_gvec_vcgd64; + } + break; + case 0xc0: + if (fpf == FPF_LONG) { + fn = gen_helper_gvec_vclgd64; + } + break; + case 0xc7: + switch (fpf) { + case FPF_SHORT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfi32; + } + break; + case FPF_LONG: + fn = gen_helper_gvec_vfi64; + break; + case FPF_EXT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfi128; + } + break; + default: + break; + } + break; + case 0xc5: + switch (fpf) { + case FPF_LONG: + fn = gen_helper_gvec_vflr64; + break; + case FPF_EXT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vflr128; + } + break; + default: + break; + } + break; + default: + g_assert_not_reached(); + } + + if (!fn || extract32(m4, 0, 2) || erm > 7 || erm == 2) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, + deposit32(m4, 4, 4, erm), fn); + return DISAS_NEXT; +} + +static DisasJumpType op_vfll(DisasContext *s, DisasOps *o) +{ + const uint8_t fpf = get_field(s, m3); + const uint8_t m4 = get_field(s, m4); + gen_helper_gvec_2_ptr *fn = NULL; + + switch (fpf) { + case FPF_SHORT: + fn = gen_helper_gvec_vfll32; + break; + case FPF_LONG: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfll64; + } + break; + default: + break; + } + + if (!fn || extract32(m4, 0, 3)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, m4, fn); + return DISAS_NEXT; +} + +static DisasJumpType op_vfmax(DisasContext *s, DisasOps *o) +{ + const uint8_t fpf = get_field(s, m4); + const uint8_t m6 = get_field(s, m6); + const uint8_t m5 = get_field(s, m5); + gen_helper_gvec_3_ptr *fn; + + if (m6 == 5 || m6 == 6 || m6 == 7 || m6 > 13) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + switch (fpf) { + case FPF_SHORT: + if (s->fields.op2 == 0xef) { + fn = gen_helper_gvec_vfmax32; + } else { + fn = gen_helper_gvec_vfmin32; + } + break; + case FPF_LONG: + if (s->fields.op2 == 0xef) { + fn = gen_helper_gvec_vfmax64; + } else { + fn = gen_helper_gvec_vfmin64; + } + break; + case FPF_EXT: + if (s->fields.op2 == 0xef) { + fn = gen_helper_gvec_vfmax128; + } else { + fn = gen_helper_gvec_vfmin128; + } + break; + default: + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), get_field(s, v3), + cpu_env, deposit32(m5, 4, 4, m6), fn); + return DISAS_NEXT; +} + +static DisasJumpType op_vfma(DisasContext *s, DisasOps *o) +{ + const uint8_t m5 = get_field(s, m5); + const uint8_t fpf = get_field(s, m6); + gen_helper_gvec_4_ptr *fn = NULL; + + switch (s->fields.op2) { + case 0x8f: + switch (fpf) { + case FPF_SHORT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfma32; + } + break; + case FPF_LONG: + fn = gen_helper_gvec_vfma64; + break; + case FPF_EXT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfma128; + } + break; + default: + break; + } + break; + case 0x8e: + switch (fpf) { + case FPF_SHORT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfms32; + } + break; + case FPF_LONG: + fn = gen_helper_gvec_vfms64; + break; + case FPF_EXT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfms128; + } + break; + default: + break; + } + break; + case 0x9f: + switch (fpf) { + case FPF_SHORT: + fn = gen_helper_gvec_vfnma32; + break; + case FPF_LONG: + fn = gen_helper_gvec_vfnma64; + break; + case FPF_EXT: + fn = gen_helper_gvec_vfnma128; + break; + default: + break; + } + break; + case 0x9e: + switch (fpf) { + case FPF_SHORT: + fn = gen_helper_gvec_vfnms32; + break; + case FPF_LONG: + fn = gen_helper_gvec_vfnms64; + break; + case FPF_EXT: + fn = gen_helper_gvec_vfnms128; + break; + default: + break; + } + break; + default: + g_assert_not_reached(); + } + + if (!fn || extract32(m5, 0, 3)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2), + get_field(s, v3), get_field(s, v4), cpu_env, m5, fn); + return DISAS_NEXT; +} + +static DisasJumpType op_vfpso(DisasContext *s, DisasOps *o) +{ + const uint8_t v1 = get_field(s, v1); + const uint8_t v2 = get_field(s, v2); + const uint8_t fpf = get_field(s, m3); + const uint8_t m4 = get_field(s, m4); + const uint8_t m5 = get_field(s, m5); + const bool se = extract32(m4, 3, 1); + TCGv_i64 tmp; + + if ((fpf != FPF_LONG && !s390_has_feat(S390_FEAT_VECTOR_ENH)) || + extract32(m4, 0, 3) || m5 > 2) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + switch (fpf) { + case FPF_SHORT: + if (!se) { + switch (m5) { + case 0: + /* sign bit is inverted (complement) */ + gen_gvec_fn_2i(xori, ES_32, v1, v2, 1ull << 31); + break; + case 1: + /* sign bit is set to one (negative) */ + gen_gvec_fn_2i(ori, ES_32, v1, v2, 1ull << 31); + break; + case 2: + /* sign bit is set to zero (positive) */ + gen_gvec_fn_2i(andi, ES_32, v1, v2, (1ull << 31) - 1); + break; + } + return DISAS_NEXT; + } + break; + case FPF_LONG: + if (!se) { + switch (m5) { + case 0: + /* sign bit is inverted (complement) */ + gen_gvec_fn_2i(xori, ES_64, v1, v2, 1ull << 63); + break; + case 1: + /* sign bit is set to one (negative) */ + gen_gvec_fn_2i(ori, ES_64, v1, v2, 1ull << 63); + break; + case 2: + /* sign bit is set to zero (positive) */ + gen_gvec_fn_2i(andi, ES_64, v1, v2, (1ull << 63) - 1); + break; + } + return DISAS_NEXT; + } + break; + case FPF_EXT: + /* Only a single element. */ + break; + default: + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* With a single element, we are only interested in bit 0. */ + tmp = tcg_temp_new_i64(); + read_vec_element_i64(tmp, v2, 0, ES_64); + switch (m5) { + case 0: + /* sign bit is inverted (complement) */ + tcg_gen_xori_i64(tmp, tmp, 1ull << 63); + break; + case 1: + /* sign bit is set to one (negative) */ + tcg_gen_ori_i64(tmp, tmp, 1ull << 63); + break; + case 2: + /* sign bit is set to zero (positive) */ + tcg_gen_andi_i64(tmp, tmp, (1ull << 63) - 1); + break; + } + write_vec_element_i64(tmp, v1, 0, ES_64); + + if (fpf == FPF_EXT) { + read_vec_element_i64(tmp, v2, 1, ES_64); + write_vec_element_i64(tmp, v1, 1, ES_64); + } + + tcg_temp_free_i64(tmp); + + return DISAS_NEXT; +} + +static DisasJumpType op_vfsq(DisasContext *s, DisasOps *o) +{ + const uint8_t fpf = get_field(s, m3); + const uint8_t m4 = get_field(s, m4); + gen_helper_gvec_2_ptr *fn = NULL; + + switch (fpf) { + case FPF_SHORT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfsq32; + } + break; + case FPF_LONG: + fn = gen_helper_gvec_vfsq64; + break; + case FPF_EXT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vfsq128; + } + break; + default: + break; + } + + if (!fn || extract32(m4, 0, 3)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, m4, fn); + return DISAS_NEXT; +} + +static DisasJumpType op_vftci(DisasContext *s, DisasOps *o) +{ + const uint16_t i3 = get_field(s, i3); + const uint8_t fpf = get_field(s, m4); + const uint8_t m5 = get_field(s, m5); + gen_helper_gvec_2_ptr *fn = NULL; + + switch (fpf) { + case FPF_SHORT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vftci32; + } + break; + case FPF_LONG: + fn = gen_helper_gvec_vftci64; + break; + case FPF_EXT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { + fn = gen_helper_gvec_vftci128; + } + break; + default: + break; + } + + if (!fn || extract32(m5, 0, 3)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, + deposit32(m5, 4, 12, i3), fn); + set_cc_static(s); + return DISAS_NEXT; +} diff --git a/target/s390x/tcg/vec.h b/target/s390x/tcg/vec.h new file mode 100644 index 0000000000..a6e361869b --- /dev/null +++ b/target/s390x/tcg/vec.h @@ -0,0 +1,141 @@ +/* + * QEMU TCG support -- s390x vector utilitites + * + * Copyright (C) 2019 Red Hat Inc + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef S390X_VEC_H +#define S390X_VEC_H + +#include "tcg/tcg.h" + +typedef union S390Vector { + uint64_t doubleword[2]; + uint32_t word[4]; + uint16_t halfword[8]; + uint8_t byte[16]; +} S390Vector; + +/* + * Each vector is stored as two 64bit host values. So when talking about + * byte/halfword/word numbers, we have to take care of proper translation + * between element numbers. + * + * Big Endian (target/possible host) + * B: [ 0][ 1][ 2][ 3][ 4][ 5][ 6][ 7] - [ 8][ 9][10][11][12][13][14][15] + * HW: [ 0][ 1][ 2][ 3] - [ 4][ 5][ 6][ 7] + * W: [ 0][ 1] - [ 2][ 3] + * DW: [ 0] - [ 1] + * + * Little Endian (possible host) + * B: [ 7][ 6][ 5][ 4][ 3][ 2][ 1][ 0] - [15][14][13][12][11][10][ 9][ 8] + * HW: [ 3][ 2][ 1][ 0] - [ 7][ 6][ 5][ 4] + * W: [ 1][ 0] - [ 3][ 2] + * DW: [ 0] - [ 1] + */ +#ifndef HOST_WORDS_BIGENDIAN +#define H1(x) ((x) ^ 7) +#define H2(x) ((x) ^ 3) +#define H4(x) ((x) ^ 1) +#else +#define H1(x) (x) +#define H2(x) (x) +#define H4(x) (x) +#endif + +static inline uint8_t s390_vec_read_element8(const S390Vector *v, uint8_t enr) +{ + g_assert(enr < 16); + return v->byte[H1(enr)]; +} + +static inline uint16_t s390_vec_read_element16(const S390Vector *v, uint8_t enr) +{ + g_assert(enr < 8); + return v->halfword[H2(enr)]; +} + +static inline uint32_t s390_vec_read_element32(const S390Vector *v, uint8_t enr) +{ + g_assert(enr < 4); + return v->word[H4(enr)]; +} + +static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr) +{ + g_assert(enr < 2); + return v->doubleword[enr]; +} + +static inline uint64_t s390_vec_read_element(const S390Vector *v, uint8_t enr, + uint8_t es) +{ + switch (es) { + case MO_8: + return s390_vec_read_element8(v, enr); + case MO_16: + return s390_vec_read_element16(v, enr); + case MO_32: + return s390_vec_read_element32(v, enr); + case MO_64: + return s390_vec_read_element64(v, enr); + default: + g_assert_not_reached(); + } +} + +static inline void s390_vec_write_element8(S390Vector *v, uint8_t enr, + uint8_t data) +{ + g_assert(enr < 16); + v->byte[H1(enr)] = data; +} + +static inline void s390_vec_write_element16(S390Vector *v, uint8_t enr, + uint16_t data) +{ + g_assert(enr < 8); + v->halfword[H2(enr)] = data; +} + +static inline void s390_vec_write_element32(S390Vector *v, uint8_t enr, + uint32_t data) +{ + g_assert(enr < 4); + v->word[H4(enr)] = data; +} + +static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr, + uint64_t data) +{ + g_assert(enr < 2); + v->doubleword[enr] = data; +} + +static inline void s390_vec_write_element(S390Vector *v, uint8_t enr, + uint8_t es, uint64_t data) +{ + switch (es) { + case MO_8: + s390_vec_write_element8(v, enr, data); + break; + case MO_16: + s390_vec_write_element16(v, enr, data); + break; + case MO_32: + s390_vec_write_element32(v, enr, data); + break; + case MO_64: + s390_vec_write_element64(v, enr, data); + break; + default: + g_assert_not_reached(); + } +} + +#endif /* S390X_VEC_H */ diff --git a/target/s390x/tcg/vec_fpu_helper.c b/target/s390x/tcg/vec_fpu_helper.c new file mode 100644 index 0000000000..1a77993471 --- /dev/null +++ b/target/s390x/tcg/vec_fpu_helper.c @@ -0,0 +1,1072 @@ +/* + * QEMU TCG support -- s390x vector floating point instruction support + * + * Copyright (C) 2019 Red Hat Inc + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "cpu.h" +#include "s390x-internal.h" +#include "vec.h" +#include "tcg_s390x.h" +#include "tcg/tcg-gvec-desc.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "fpu/softfloat.h" + +#define VIC_INVALID 0x1 +#define VIC_DIVBYZERO 0x2 +#define VIC_OVERFLOW 0x3 +#define VIC_UNDERFLOW 0x4 +#define VIC_INEXACT 0x5 + +/* returns the VEX. If the VEX is 0, there is no trap */ +static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC, + uint8_t *vec_exc) +{ + uint8_t vece_exc = 0, trap_exc; + unsigned qemu_exc; + + /* Retrieve and clear the softfloat exceptions */ + qemu_exc = env->fpu_status.float_exception_flags; + if (qemu_exc == 0) { + return 0; + } + env->fpu_status.float_exception_flags = 0; + + vece_exc = s390_softfloat_exc_to_ieee(qemu_exc); + + /* Add them to the vector-wide s390x exception bits */ + *vec_exc |= vece_exc; + + /* Check for traps and construct the VXC */ + trap_exc = vece_exc & env->fpc >> 24; + if (trap_exc) { + if (trap_exc & S390_IEEE_MASK_INVALID) { + return enr << 4 | VIC_INVALID; + } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) { + return enr << 4 | VIC_DIVBYZERO; + } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) { + return enr << 4 | VIC_OVERFLOW; + } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) { + return enr << 4 | VIC_UNDERFLOW; + } else if (!XxC) { + g_assert(trap_exc & S390_IEEE_MASK_INEXACT); + /* inexact has lowest priority on traps */ + return enr << 4 | VIC_INEXACT; + } + } + return 0; +} + +static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc, + uintptr_t retaddr) +{ + if (vxc) { + /* on traps, the fpc flags are not updated, instruction is suppressed */ + tcg_s390_vector_exception(env, vxc, retaddr); + } + if (vec_exc) { + /* indicate exceptions for all elements combined */ + env->fpc |= vec_exc << 16; + } +} + +static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr) +{ + return make_float32(s390_vec_read_element32(v, enr)); +} + +static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr) +{ + return make_float64(s390_vec_read_element64(v, enr)); +} + +static float128 s390_vec_read_float128(const S390Vector *v) +{ + return make_float128(s390_vec_read_element64(v, 0), + s390_vec_read_element64(v, 1)); +} + +static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data) +{ + return s390_vec_write_element32(v, enr, data); +} + +static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data) +{ + return s390_vec_write_element64(v, enr, data); +} + +static void s390_vec_write_float128(S390Vector *v, float128 data) +{ + s390_vec_write_element64(v, 0, data.high); + s390_vec_write_element64(v, 1, data.low); +} + +typedef float32 (*vop32_2_fn)(float32 a, float_status *s); +static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, + bool s, bool XxC, uint8_t erm, vop32_2_fn fn, + uintptr_t retaddr) +{ + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i, old_mode; + + old_mode = s390_swap_bfp_rounding_mode(env, erm); + for (i = 0; i < 4; i++) { + const float32 a = s390_vec_read_float32(v2, i); + + s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status)); + vxc = check_ieee_exc(env, i, XxC, &vec_exc); + if (s || vxc) { + break; + } + } + s390_restore_bfp_rounding_mode(env, old_mode); + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +typedef float64 (*vop64_2_fn)(float64 a, float_status *s); +static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, + bool s, bool XxC, uint8_t erm, vop64_2_fn fn, + uintptr_t retaddr) +{ + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i, old_mode; + + old_mode = s390_swap_bfp_rounding_mode(env, erm); + for (i = 0; i < 2; i++) { + const float64 a = s390_vec_read_float64(v2, i); + + s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status)); + vxc = check_ieee_exc(env, i, XxC, &vec_exc); + if (s || vxc) { + break; + } + } + s390_restore_bfp_rounding_mode(env, old_mode); + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +typedef float128 (*vop128_2_fn)(float128 a, float_status *s); +static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, + bool s, bool XxC, uint8_t erm, vop128_2_fn fn, + uintptr_t retaddr) +{ + const float128 a = s390_vec_read_float128(v2); + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int old_mode; + + old_mode = s390_swap_bfp_rounding_mode(env, erm); + s390_vec_write_float128(&tmp, fn(a, &env->fpu_status)); + vxc = check_ieee_exc(env, 0, XxC, &vec_exc); + s390_restore_bfp_rounding_mode(env, old_mode); + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +static float64 vcdg64(float64 a, float_status *s) +{ + return int64_to_float64(a, s); +} + +static float64 vcdlg64(float64 a, float_status *s) +{ + return uint64_to_float64(a, s); +} + +static float64 vcgd64(float64 a, float_status *s) +{ + const float64 tmp = float64_to_int64(a, s); + + return float64_is_any_nan(a) ? INT64_MIN : tmp; +} + +static float64 vclgd64(float64 a, float_status *s) +{ + const float64 tmp = float64_to_uint64(a, s); + + return float64_is_any_nan(a) ? 0 : tmp; +} + +#define DEF_GVEC_VOP2_FN(NAME, FN, BITS) \ +void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \ + uint32_t desc) \ +{ \ + const uint8_t erm = extract32(simd_data(desc), 4, 4); \ + const bool se = extract32(simd_data(desc), 3, 1); \ + const bool XxC = extract32(simd_data(desc), 2, 1); \ + \ + vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \ +} + +#define DEF_GVEC_VOP2_64(NAME) \ +DEF_GVEC_VOP2_FN(NAME, NAME##64, 64) + +#define DEF_GVEC_VOP2(NAME, OP) \ +DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32) \ +DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64) \ +DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128) + +DEF_GVEC_VOP2_64(vcdg) +DEF_GVEC_VOP2_64(vcdlg) +DEF_GVEC_VOP2_64(vcgd) +DEF_GVEC_VOP2_64(vclgd) +DEF_GVEC_VOP2(vfi, round_to_int) +DEF_GVEC_VOP2(vfsq, sqrt) + +typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s); +static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, + CPUS390XState *env, bool s, vop32_3_fn fn, + uintptr_t retaddr) +{ + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i; + + for (i = 0; i < 4; i++) { + const float32 a = s390_vec_read_float32(v2, i); + const float32 b = s390_vec_read_float32(v3, i); + + s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status)); + vxc = check_ieee_exc(env, i, false, &vec_exc); + if (s || vxc) { + break; + } + } + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s); +static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, + CPUS390XState *env, bool s, vop64_3_fn fn, + uintptr_t retaddr) +{ + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i; + + for (i = 0; i < 2; i++) { + const float64 a = s390_vec_read_float64(v2, i); + const float64 b = s390_vec_read_float64(v3, i); + + s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status)); + vxc = check_ieee_exc(env, i, false, &vec_exc); + if (s || vxc) { + break; + } + } + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s); +static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, + CPUS390XState *env, bool s, vop128_3_fn fn, + uintptr_t retaddr) +{ + const float128 a = s390_vec_read_float128(v2); + const float128 b = s390_vec_read_float128(v3); + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + + s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status)); + vxc = check_ieee_exc(env, 0, false, &vec_exc); + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +#define DEF_GVEC_VOP3_B(NAME, OP, BITS) \ +void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + const bool se = extract32(simd_data(desc), 3, 1); \ + \ + vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC()); \ +} + +#define DEF_GVEC_VOP3(NAME, OP) \ +DEF_GVEC_VOP3_B(NAME, OP, 32) \ +DEF_GVEC_VOP3_B(NAME, OP, 64) \ +DEF_GVEC_VOP3_B(NAME, OP, 128) + +DEF_GVEC_VOP3(vfa, add) +DEF_GVEC_VOP3(vfs, sub) +DEF_GVEC_VOP3(vfd, div) +DEF_GVEC_VOP3(vfm, mul) + +static int wfc32(const S390Vector *v1, const S390Vector *v2, + CPUS390XState *env, bool signal, uintptr_t retaddr) +{ + /* only the zero-indexed elements are compared */ + const float32 a = s390_vec_read_float32(v1, 0); + const float32 b = s390_vec_read_float32(v2, 0); + uint8_t vxc, vec_exc = 0; + int cmp; + + if (signal) { + cmp = float32_compare(a, b, &env->fpu_status); + } else { + cmp = float32_compare_quiet(a, b, &env->fpu_status); + } + vxc = check_ieee_exc(env, 0, false, &vec_exc); + handle_ieee_exc(env, vxc, vec_exc, retaddr); + + return float_comp_to_cc(env, cmp); +} + +static int wfc64(const S390Vector *v1, const S390Vector *v2, + CPUS390XState *env, bool signal, uintptr_t retaddr) +{ + /* only the zero-indexed elements are compared */ + const float64 a = s390_vec_read_float64(v1, 0); + const float64 b = s390_vec_read_float64(v2, 0); + uint8_t vxc, vec_exc = 0; + int cmp; + + if (signal) { + cmp = float64_compare(a, b, &env->fpu_status); + } else { + cmp = float64_compare_quiet(a, b, &env->fpu_status); + } + vxc = check_ieee_exc(env, 0, false, &vec_exc); + handle_ieee_exc(env, vxc, vec_exc, retaddr); + + return float_comp_to_cc(env, cmp); +} + +static int wfc128(const S390Vector *v1, const S390Vector *v2, + CPUS390XState *env, bool signal, uintptr_t retaddr) +{ + /* only the zero-indexed elements are compared */ + const float128 a = s390_vec_read_float128(v1); + const float128 b = s390_vec_read_float128(v2); + uint8_t vxc, vec_exc = 0; + int cmp; + + if (signal) { + cmp = float128_compare(a, b, &env->fpu_status); + } else { + cmp = float128_compare_quiet(a, b, &env->fpu_status); + } + vxc = check_ieee_exc(env, 0, false, &vec_exc); + handle_ieee_exc(env, vxc, vec_exc, retaddr); + + return float_comp_to_cc(env, cmp); +} + +#define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS) \ +void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC()); \ +} + +#define DEF_GVEC_WFC(NAME, SIGNAL) \ + DEF_GVEC_WFC_B(NAME, SIGNAL, 32) \ + DEF_GVEC_WFC_B(NAME, SIGNAL, 64) \ + DEF_GVEC_WFC_B(NAME, SIGNAL, 128) + +DEF_GVEC_WFC(wfc, false) +DEF_GVEC_WFC(wfk, true) + +typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status); +static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, + CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr) +{ + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int match = 0; + int i; + + for (i = 0; i < 4; i++) { + const float32 a = s390_vec_read_float32(v2, i); + const float32 b = s390_vec_read_float32(v3, i); + + /* swap the order of the parameters, so we can use existing functions */ + if (fn(b, a, &env->fpu_status)) { + match++; + s390_vec_write_element32(&tmp, i, -1u); + } + vxc = check_ieee_exc(env, i, false, &vec_exc); + if (s || vxc) { + break; + } + } + + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; + if (match) { + return s || match == 4 ? 0 : 1; + } + return 3; +} + +typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status); +static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, + CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr) +{ + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int match = 0; + int i; + + for (i = 0; i < 2; i++) { + const float64 a = s390_vec_read_float64(v2, i); + const float64 b = s390_vec_read_float64(v3, i); + + /* swap the order of the parameters, so we can use existing functions */ + if (fn(b, a, &env->fpu_status)) { + match++; + s390_vec_write_element64(&tmp, i, -1ull); + } + vxc = check_ieee_exc(env, i, false, &vec_exc); + if (s || vxc) { + break; + } + } + + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; + if (match) { + return s || match == 2 ? 0 : 1; + } + return 3; +} + +typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status); +static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, + CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr) +{ + const float128 a = s390_vec_read_float128(v2); + const float128 b = s390_vec_read_float128(v3); + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + bool match = false; + + /* swap the order of the parameters, so we can use existing functions */ + if (fn(b, a, &env->fpu_status)) { + match = true; + s390_vec_write_element64(&tmp, 0, -1ull); + s390_vec_write_element64(&tmp, 1, -1ull); + } + vxc = check_ieee_exc(env, 0, false, &vec_exc); + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; + return match ? 0 : 3; +} + +#define DEF_GVEC_VFC_B(NAME, OP, BITS) \ +void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + const bool se = extract32(simd_data(desc), 3, 1); \ + const bool sq = extract32(simd_data(desc), 2, 1); \ + vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \ + \ + vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ +} \ + \ +void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + const bool se = extract32(simd_data(desc), 3, 1); \ + const bool sq = extract32(simd_data(desc), 2, 1); \ + vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet; \ + \ + env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC()); \ +} + +#define DEF_GVEC_VFC(NAME, OP) \ +DEF_GVEC_VFC_B(NAME, OP, 32) \ +DEF_GVEC_VFC_B(NAME, OP, 64) \ +DEF_GVEC_VFC_B(NAME, OP, 128) \ + +DEF_GVEC_VFC(vfce, eq) +DEF_GVEC_VFC(vfch, lt) +DEF_GVEC_VFC(vfche, le) + +void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const bool s = extract32(simd_data(desc), 3, 1); + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i; + + for (i = 0; i < 2; i++) { + /* load from even element */ + const float32 a = s390_vec_read_element32(v2, i * 2); + const uint64_t ret = float32_to_float64(a, &env->fpu_status); + + s390_vec_write_element64(&tmp, i, ret); + /* indicate the source element */ + vxc = check_ieee_exc(env, i * 2, false, &vec_exc); + if (s || vxc) { + break; + } + } + handle_ieee_exc(env, vxc, vec_exc, GETPC()); + *(S390Vector *)v1 = tmp; +} + +void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + /* load from even element */ + const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0), + &env->fpu_status); + uint8_t vxc, vec_exc = 0; + + vxc = check_ieee_exc(env, 0, false, &vec_exc); + handle_ieee_exc(env, vxc, vec_exc, GETPC()); + s390_vec_write_float128(v1, ret); +} + +void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint8_t erm = extract32(simd_data(desc), 4, 4); + const bool s = extract32(simd_data(desc), 3, 1); + const bool XxC = extract32(simd_data(desc), 2, 1); + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i, old_mode; + + old_mode = s390_swap_bfp_rounding_mode(env, erm); + for (i = 0; i < 2; i++) { + float64 a = s390_vec_read_element64(v2, i); + uint32_t ret = float64_to_float32(a, &env->fpu_status); + + /* place at even element */ + s390_vec_write_element32(&tmp, i * 2, ret); + /* indicate the source element */ + vxc = check_ieee_exc(env, i, XxC, &vec_exc); + if (s || vxc) { + break; + } + } + s390_restore_bfp_rounding_mode(env, old_mode); + handle_ieee_exc(env, vxc, vec_exc, GETPC()); + *(S390Vector *)v1 = tmp; +} + +void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint8_t erm = extract32(simd_data(desc), 4, 4); + const bool XxC = extract32(simd_data(desc), 2, 1); + uint8_t vxc, vec_exc = 0; + int old_mode; + float64 ret; + + old_mode = s390_swap_bfp_rounding_mode(env, erm); + ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status); + vxc = check_ieee_exc(env, 0, XxC, &vec_exc); + s390_restore_bfp_rounding_mode(env, old_mode); + handle_ieee_exc(env, vxc, vec_exc, GETPC()); + + /* place at even element, odd element is unpredictable */ + s390_vec_write_float64(v1, 0, ret); +} + +static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, + const S390Vector *v4, CPUS390XState *env, bool s, int flags, + uintptr_t retaddr) +{ + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i; + + for (i = 0; i < 4; i++) { + const float32 a = s390_vec_read_float32(v2, i); + const float32 b = s390_vec_read_float32(v3, i); + const float32 c = s390_vec_read_float32(v4, i); + float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status); + + s390_vec_write_float32(&tmp, i, ret); + vxc = check_ieee_exc(env, i, false, &vec_exc); + if (s || vxc) { + break; + } + } + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, + const S390Vector *v4, CPUS390XState *env, bool s, int flags, + uintptr_t retaddr) +{ + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i; + + for (i = 0; i < 2; i++) { + const float64 a = s390_vec_read_float64(v2, i); + const float64 b = s390_vec_read_float64(v3, i); + const float64 c = s390_vec_read_float64(v4, i); + const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status); + + s390_vec_write_float64(&tmp, i, ret); + vxc = check_ieee_exc(env, i, false, &vec_exc); + if (s || vxc) { + break; + } + } + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, + const S390Vector *v4, CPUS390XState *env, bool s, int flags, + uintptr_t retaddr) +{ + const float128 a = s390_vec_read_float128(v2); + const float128 b = s390_vec_read_float128(v3); + const float128 c = s390_vec_read_float128(v4); + uint8_t vxc, vec_exc = 0; + float128 ret; + + ret = float128_muladd(a, b, c, flags, &env->fpu_status); + vxc = check_ieee_exc(env, 0, false, &vec_exc); + handle_ieee_exc(env, vxc, vec_exc, retaddr); + s390_vec_write_float128(v1, ret); +} + +#define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS) \ +void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, CPUS390XState *env, \ + uint32_t desc) \ +{ \ + const bool se = extract32(simd_data(desc), 3, 1); \ + \ + vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC()); \ +} + +#define DEF_GVEC_VFMA(NAME, FLAGS) \ + DEF_GVEC_VFMA_B(NAME, FLAGS, 32) \ + DEF_GVEC_VFMA_B(NAME, FLAGS, 64) \ + DEF_GVEC_VFMA_B(NAME, FLAGS, 128) + +DEF_GVEC_VFMA(vfma, 0) +DEF_GVEC_VFMA(vfms, float_muladd_negate_c) +DEF_GVEC_VFMA(vfnma, float_muladd_negate_result) +DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result) + +void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + uint16_t i3 = extract32(simd_data(desc), 4, 12); + bool s = extract32(simd_data(desc), 3, 1); + int i, match = 0; + + for (i = 0; i < 4; i++) { + float32 a = s390_vec_read_float32(v2, i); + + if (float32_dcmask(env, a) & i3) { + match++; + s390_vec_write_element32(v1, i, -1u); + } else { + s390_vec_write_element32(v1, i, 0); + } + if (s) { + break; + } + } + + if (match == 4 || (s && match)) { + env->cc_op = 0; + } else if (match) { + env->cc_op = 1; + } else { + env->cc_op = 3; + } +} + +void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint16_t i3 = extract32(simd_data(desc), 4, 12); + const bool s = extract32(simd_data(desc), 3, 1); + int i, match = 0; + + for (i = 0; i < 2; i++) { + const float64 a = s390_vec_read_float64(v2, i); + + if (float64_dcmask(env, a) & i3) { + match++; + s390_vec_write_element64(v1, i, -1ull); + } else { + s390_vec_write_element64(v1, i, 0); + } + if (s) { + break; + } + } + + if (match == 2 || (s && match)) { + env->cc_op = 0; + } else if (match) { + env->cc_op = 1; + } else { + env->cc_op = 3; + } +} + +void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const float128 a = s390_vec_read_float128(v2); + uint16_t i3 = extract32(simd_data(desc), 4, 12); + + if (float128_dcmask(env, a) & i3) { + env->cc_op = 0; + s390_vec_write_element64(v1, 0, -1ull); + s390_vec_write_element64(v1, 1, -1ull); + } else { + env->cc_op = 3; + s390_vec_write_element64(v1, 0, 0); + s390_vec_write_element64(v1, 1, 0); + } +} + +typedef enum S390MinMaxType { + S390_MINMAX_TYPE_IEEE = 0, + S390_MINMAX_TYPE_JAVA, + S390_MINMAX_TYPE_C_MACRO, + S390_MINMAX_TYPE_CPP, + S390_MINMAX_TYPE_F, +} S390MinMaxType; + +typedef enum S390MinMaxRes { + S390_MINMAX_RES_MINMAX = 0, + S390_MINMAX_RES_A, + S390_MINMAX_RES_B, + S390_MINMAX_RES_SILENCE_A, + S390_MINMAX_RES_SILENCE_B, +} S390MinMaxRes; + +static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b, + S390MinMaxType type, float_status *s) +{ + const bool neg_a = dcmask_a & DCMASK_NEGATIVE; + const bool nan_a = dcmask_a & DCMASK_NAN; + const bool nan_b = dcmask_b & DCMASK_NAN; + + g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F); + + if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) { + const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN; + const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN; + + if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) { + s->float_exception_flags |= float_flag_invalid; + } + switch (type) { + case S390_MINMAX_TYPE_JAVA: + if (sig_a) { + return S390_MINMAX_RES_SILENCE_A; + } else if (sig_b) { + return S390_MINMAX_RES_SILENCE_B; + } + return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; + case S390_MINMAX_TYPE_F: + return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; + case S390_MINMAX_TYPE_C_MACRO: + s->float_exception_flags |= float_flag_invalid; + return S390_MINMAX_RES_B; + case S390_MINMAX_TYPE_CPP: + s->float_exception_flags |= float_flag_invalid; + return S390_MINMAX_RES_A; + default: + g_assert_not_reached(); + } + } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) { + switch (type) { + case S390_MINMAX_TYPE_JAVA: + return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; + case S390_MINMAX_TYPE_C_MACRO: + return S390_MINMAX_RES_B; + case S390_MINMAX_TYPE_F: + return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A; + case S390_MINMAX_TYPE_CPP: + return S390_MINMAX_RES_A; + default: + g_assert_not_reached(); + } + } + return S390_MINMAX_RES_MINMAX; +} + +static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b, + S390MinMaxType type, float_status *s) +{ + g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F); + + if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) { + const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN; + const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN; + const bool nan_a = dcmask_a & DCMASK_NAN; + const bool nan_b = dcmask_b & DCMASK_NAN; + + if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) { + s->float_exception_flags |= float_flag_invalid; + } + switch (type) { + case S390_MINMAX_TYPE_JAVA: + if (sig_a) { + return S390_MINMAX_RES_SILENCE_A; + } else if (sig_b) { + return S390_MINMAX_RES_SILENCE_B; + } + return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; + case S390_MINMAX_TYPE_F: + return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B; + case S390_MINMAX_TYPE_C_MACRO: + s->float_exception_flags |= float_flag_invalid; + return S390_MINMAX_RES_B; + case S390_MINMAX_TYPE_CPP: + s->float_exception_flags |= float_flag_invalid; + return S390_MINMAX_RES_A; + default: + g_assert_not_reached(); + } + } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) { + const bool neg_a = dcmask_a & DCMASK_NEGATIVE; + + switch (type) { + case S390_MINMAX_TYPE_JAVA: + case S390_MINMAX_TYPE_F: + return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A; + case S390_MINMAX_TYPE_C_MACRO: + return S390_MINMAX_RES_B; + case S390_MINMAX_TYPE_CPP: + return S390_MINMAX_RES_A; + default: + g_assert_not_reached(); + } + } + return S390_MINMAX_RES_MINMAX; +} + +static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b, + S390MinMaxType type, bool is_min, + float_status *s) +{ + return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) : + vfmax_res(dcmask_a, dcmask_b, type, s); +} + +static void vfminmax32(S390Vector *v1, const S390Vector *v2, + const S390Vector *v3, CPUS390XState *env, + S390MinMaxType type, bool is_min, bool is_abs, bool se, + uintptr_t retaddr) +{ + float_status *s = &env->fpu_status; + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i; + + for (i = 0; i < 4; i++) { + float32 a = s390_vec_read_float32(v2, i); + float32 b = s390_vec_read_float32(v3, i); + float32 result; + + if (type != S390_MINMAX_TYPE_IEEE) { + S390MinMaxRes res; + + if (is_abs) { + a = float32_abs(a); + b = float32_abs(b); + } + + res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b), + type, is_min, s); + switch (res) { + case S390_MINMAX_RES_MINMAX: + result = is_min ? float32_min(a, b, s) : float32_max(a, b, s); + break; + case S390_MINMAX_RES_A: + result = a; + break; + case S390_MINMAX_RES_B: + result = b; + break; + case S390_MINMAX_RES_SILENCE_A: + result = float32_silence_nan(a, s); + break; + case S390_MINMAX_RES_SILENCE_B: + result = float32_silence_nan(b, s); + break; + default: + g_assert_not_reached(); + } + } else if (!is_abs) { + result = is_min ? float32_minnum(a, b, &env->fpu_status) : + float32_maxnum(a, b, &env->fpu_status); + } else { + result = is_min ? float32_minnummag(a, b, &env->fpu_status) : + float32_maxnummag(a, b, &env->fpu_status); + } + + s390_vec_write_float32(&tmp, i, result); + vxc = check_ieee_exc(env, i, false, &vec_exc); + if (se || vxc) { + break; + } + } + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +static void vfminmax64(S390Vector *v1, const S390Vector *v2, + const S390Vector *v3, CPUS390XState *env, + S390MinMaxType type, bool is_min, bool is_abs, bool se, + uintptr_t retaddr) +{ + float_status *s = &env->fpu_status; + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i; + + for (i = 0; i < 2; i++) { + float64 a = s390_vec_read_float64(v2, i); + float64 b = s390_vec_read_float64(v3, i); + float64 result; + + if (type != S390_MINMAX_TYPE_IEEE) { + S390MinMaxRes res; + + if (is_abs) { + a = float64_abs(a); + b = float64_abs(b); + } + + res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b), + type, is_min, s); + switch (res) { + case S390_MINMAX_RES_MINMAX: + result = is_min ? float64_min(a, b, s) : float64_max(a, b, s); + break; + case S390_MINMAX_RES_A: + result = a; + break; + case S390_MINMAX_RES_B: + result = b; + break; + case S390_MINMAX_RES_SILENCE_A: + result = float64_silence_nan(a, s); + break; + case S390_MINMAX_RES_SILENCE_B: + result = float64_silence_nan(b, s); + break; + default: + g_assert_not_reached(); + } + } else if (!is_abs) { + result = is_min ? float64_minnum(a, b, &env->fpu_status) : + float64_maxnum(a, b, &env->fpu_status); + } else { + result = is_min ? float64_minnummag(a, b, &env->fpu_status) : + float64_maxnummag(a, b, &env->fpu_status); + } + + s390_vec_write_float64(&tmp, i, result); + vxc = check_ieee_exc(env, i, false, &vec_exc); + if (se || vxc) { + break; + } + } + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +static void vfminmax128(S390Vector *v1, const S390Vector *v2, + const S390Vector *v3, CPUS390XState *env, + S390MinMaxType type, bool is_min, bool is_abs, bool se, + uintptr_t retaddr) +{ + float128 a = s390_vec_read_float128(v2); + float128 b = s390_vec_read_float128(v3); + float_status *s = &env->fpu_status; + uint8_t vxc, vec_exc = 0; + float128 result; + + if (type != S390_MINMAX_TYPE_IEEE) { + S390MinMaxRes res; + + if (is_abs) { + a = float128_abs(a); + b = float128_abs(b); + } + + res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b), + type, is_min, s); + switch (res) { + case S390_MINMAX_RES_MINMAX: + result = is_min ? float128_min(a, b, s) : float128_max(a, b, s); + break; + case S390_MINMAX_RES_A: + result = a; + break; + case S390_MINMAX_RES_B: + result = b; + break; + case S390_MINMAX_RES_SILENCE_A: + result = float128_silence_nan(a, s); + break; + case S390_MINMAX_RES_SILENCE_B: + result = float128_silence_nan(b, s); + break; + default: + g_assert_not_reached(); + } + } else if (!is_abs) { + result = is_min ? float128_minnum(a, b, &env->fpu_status) : + float128_maxnum(a, b, &env->fpu_status); + } else { + result = is_min ? float128_minnummag(a, b, &env->fpu_status) : + float128_maxnummag(a, b, &env->fpu_status); + } + + vxc = check_ieee_exc(env, 0, false, &vec_exc); + handle_ieee_exc(env, vxc, vec_exc, retaddr); + s390_vec_write_float128(v1, result); +} + +#define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS) \ +void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + const bool se = extract32(simd_data(desc), 3, 1); \ + uint8_t type = extract32(simd_data(desc), 4, 4); \ + bool is_abs = false; \ + \ + if (type >= 8) { \ + is_abs = true; \ + type -= 8; \ + } \ + \ + vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC()); \ +} + +#define DEF_GVEC_VFMINMAX(NAME, IS_MIN) \ + DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32) \ + DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64) \ + DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128) + +DEF_GVEC_VFMINMAX(vfmax, false) +DEF_GVEC_VFMINMAX(vfmin, true) diff --git a/target/s390x/tcg/vec_helper.c b/target/s390x/tcg/vec_helper.c new file mode 100644 index 0000000000..ededf13cf0 --- /dev/null +++ b/target/s390x/tcg/vec_helper.c @@ -0,0 +1,214 @@ +/* + * QEMU TCG support -- s390x vector support instructions + * + * Copyright (C) 2019 Red Hat Inc + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "cpu.h" +#include "s390x-internal.h" +#include "vec.h" +#include "tcg/tcg.h" +#include "tcg/tcg-gvec-desc.h" +#include "exec/helper-proto.h" +#include "exec/cpu_ldst.h" +#include "exec/exec-all.h" + +void HELPER(gvec_vbperm)(void *v1, const void *v2, const void *v3, + uint32_t desc) +{ + S390Vector tmp = {}; + uint16_t result = 0; + int i; + + for (i = 0; i < 16; i++) { + const uint8_t bit_nr = s390_vec_read_element8(v3, i); + uint16_t bit; + + if (bit_nr >= 128) { + continue; + } + bit = (s390_vec_read_element8(v2, bit_nr / 8) + >> (7 - (bit_nr % 8))) & 1; + result |= (bit << (15 - i)); + } + s390_vec_write_element16(&tmp, 3, result); + *(S390Vector *)v1 = tmp; +} + +void HELPER(vll)(CPUS390XState *env, void *v1, uint64_t addr, uint64_t bytes) +{ + if (likely(bytes >= 16)) { + uint64_t t0, t1; + + t0 = cpu_ldq_data_ra(env, addr, GETPC()); + addr = wrap_address(env, addr + 8); + t1 = cpu_ldq_data_ra(env, addr, GETPC()); + s390_vec_write_element64(v1, 0, t0); + s390_vec_write_element64(v1, 1, t1); + } else { + S390Vector tmp = {}; + int i; + + for (i = 0; i < bytes; i++) { + uint8_t byte = cpu_ldub_data_ra(env, addr, GETPC()); + + s390_vec_write_element8(&tmp, i, byte); + addr = wrap_address(env, addr + 1); + } + *(S390Vector *)v1 = tmp; + } +} + +#define DEF_VPK_HFN(BITS, TBITS) \ +typedef uint##TBITS##_t (*vpk##BITS##_fn)(uint##BITS##_t, int *); \ +static int vpk##BITS##_hfn(S390Vector *v1, const S390Vector *v2, \ + const S390Vector *v3, vpk##BITS##_fn fn) \ +{ \ + int i, saturated = 0; \ + S390Vector tmp; \ + \ + for (i = 0; i < (128 / TBITS); i++) { \ + uint##BITS##_t src; \ + \ + if (i < (128 / BITS)) { \ + src = s390_vec_read_element##BITS(v2, i); \ + } else { \ + src = s390_vec_read_element##BITS(v3, i - (128 / BITS)); \ + } \ + s390_vec_write_element##TBITS(&tmp, i, fn(src, &saturated)); \ + } \ + *v1 = tmp; \ + return saturated; \ +} +DEF_VPK_HFN(64, 32) +DEF_VPK_HFN(32, 16) +DEF_VPK_HFN(16, 8) + +#define DEF_VPK(BITS, TBITS) \ +static uint##TBITS##_t vpk##BITS##e(uint##BITS##_t src, int *saturated) \ +{ \ + return src; \ +} \ +void HELPER(gvec_vpk##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + vpk##BITS##_hfn(v1, v2, v3, vpk##BITS##e); \ +} +DEF_VPK(64, 32) +DEF_VPK(32, 16) +DEF_VPK(16, 8) + +#define DEF_VPKS(BITS, TBITS) \ +static uint##TBITS##_t vpks##BITS##e(uint##BITS##_t src, int *saturated) \ +{ \ + if ((int##BITS##_t)src > INT##TBITS##_MAX) { \ + (*saturated)++; \ + return INT##TBITS##_MAX; \ + } else if ((int##BITS##_t)src < INT##TBITS##_MIN) { \ + (*saturated)++; \ + return INT##TBITS##_MIN; \ + } \ + return src; \ +} \ +void HELPER(gvec_vpks##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + vpk##BITS##_hfn(v1, v2, v3, vpks##BITS##e); \ +} \ +void HELPER(gvec_vpks_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + int saturated = vpk##BITS##_hfn(v1, v2, v3, vpks##BITS##e); \ + \ + if (saturated == (128 / TBITS)) { \ + env->cc_op = 3; \ + } else if (saturated) { \ + env->cc_op = 1; \ + } else { \ + env->cc_op = 0; \ + } \ +} +DEF_VPKS(64, 32) +DEF_VPKS(32, 16) +DEF_VPKS(16, 8) + +#define DEF_VPKLS(BITS, TBITS) \ +static uint##TBITS##_t vpkls##BITS##e(uint##BITS##_t src, int *saturated) \ +{ \ + if (src > UINT##TBITS##_MAX) { \ + (*saturated)++; \ + return UINT##TBITS##_MAX; \ + } \ + return src; \ +} \ +void HELPER(gvec_vpkls##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + vpk##BITS##_hfn(v1, v2, v3, vpkls##BITS##e); \ +} \ +void HELPER(gvec_vpkls_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + int saturated = vpk##BITS##_hfn(v1, v2, v3, vpkls##BITS##e); \ + \ + if (saturated == (128 / TBITS)) { \ + env->cc_op = 3; \ + } else if (saturated) { \ + env->cc_op = 1; \ + } else { \ + env->cc_op = 0; \ + } \ +} +DEF_VPKLS(64, 32) +DEF_VPKLS(32, 16) +DEF_VPKLS(16, 8) + +void HELPER(gvec_vperm)(void *v1, const void *v2, const void *v3, + const void *v4, uint32_t desc) +{ + S390Vector tmp; + int i; + + for (i = 0; i < 16; i++) { + const uint8_t selector = s390_vec_read_element8(v4, i) & 0x1f; + uint8_t byte; + + if (selector < 16) { + byte = s390_vec_read_element8(v2, selector); + } else { + byte = s390_vec_read_element8(v3, selector - 16); + } + s390_vec_write_element8(&tmp, i, byte); + } + *(S390Vector *)v1 = tmp; +} + +void HELPER(vstl)(CPUS390XState *env, const void *v1, uint64_t addr, + uint64_t bytes) +{ + /* Probe write access before actually modifying memory */ + probe_write_access(env, addr, bytes, GETPC()); + + if (likely(bytes >= 16)) { + cpu_stq_data_ra(env, addr, s390_vec_read_element64(v1, 0), GETPC()); + addr = wrap_address(env, addr + 8); + cpu_stq_data_ra(env, addr, s390_vec_read_element64(v1, 1), GETPC()); + } else { + S390Vector tmp = {}; + int i; + + for (i = 0; i < bytes; i++) { + uint8_t byte = s390_vec_read_element8(v1, i); + + cpu_stb_data_ra(env, addr, byte, GETPC()); + addr = wrap_address(env, addr + 1); + } + *(S390Vector *)v1 = tmp; + } +} diff --git a/target/s390x/tcg/vec_int_helper.c b/target/s390x/tcg/vec_int_helper.c new file mode 100644 index 0000000000..5561b3ed90 --- /dev/null +++ b/target/s390x/tcg/vec_int_helper.c @@ -0,0 +1,587 @@ +/* + * QEMU TCG support -- s390x vector integer instruction support + * + * Copyright (C) 2019 Red Hat Inc + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "cpu.h" +#include "vec.h" +#include "exec/helper-proto.h" +#include "tcg/tcg-gvec-desc.h" + +static bool s390_vec_is_zero(const S390Vector *v) +{ + return !v->doubleword[0] && !v->doubleword[1]; +} + +static void s390_vec_xor(S390Vector *res, const S390Vector *a, + const S390Vector *b) +{ + res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0]; + res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1]; +} + +static void s390_vec_and(S390Vector *res, const S390Vector *a, + const S390Vector *b) +{ + res->doubleword[0] = a->doubleword[0] & b->doubleword[0]; + res->doubleword[1] = a->doubleword[1] & b->doubleword[1]; +} + +static bool s390_vec_equal(const S390Vector *a, const S390Vector *b) +{ + return a->doubleword[0] == b->doubleword[0] && + a->doubleword[1] == b->doubleword[1]; +} + +static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count) +{ + uint64_t tmp; + + g_assert(count < 128); + if (count == 0) { + d->doubleword[0] = a->doubleword[0]; + d->doubleword[1] = a->doubleword[1]; + } else if (count == 64) { + d->doubleword[0] = a->doubleword[1]; + d->doubleword[1] = 0; + } else if (count < 64) { + tmp = extract64(a->doubleword[1], 64 - count, count); + d->doubleword[1] = a->doubleword[1] << count; + d->doubleword[0] = (a->doubleword[0] << count) | tmp; + } else { + d->doubleword[0] = a->doubleword[1] << (count - 64); + d->doubleword[1] = 0; + } +} + +static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count) +{ + uint64_t tmp; + + if (count == 0) { + d->doubleword[0] = a->doubleword[0]; + d->doubleword[1] = a->doubleword[1]; + } else if (count == 64) { + tmp = (int64_t)a->doubleword[0] >> 63; + d->doubleword[1] = a->doubleword[0]; + d->doubleword[0] = tmp; + } else if (count < 64) { + tmp = a->doubleword[1] >> count; + d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); + d->doubleword[0] = (int64_t)a->doubleword[0] >> count; + } else { + tmp = (int64_t)a->doubleword[0] >> 63; + d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64); + d->doubleword[0] = tmp; + } +} + +static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count) +{ + uint64_t tmp; + + g_assert(count < 128); + if (count == 0) { + d->doubleword[0] = a->doubleword[0]; + d->doubleword[1] = a->doubleword[1]; + } else if (count == 64) { + d->doubleword[1] = a->doubleword[0]; + d->doubleword[0] = 0; + } else if (count < 64) { + tmp = a->doubleword[1] >> count; + d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]); + d->doubleword[0] = a->doubleword[0] >> count; + } else { + d->doubleword[1] = a->doubleword[0] >> (count - 64); + d->doubleword[0] = 0; + } +} +#define DEF_VAVG(BITS) \ +void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + int i; \ + \ + for (i = 0; i < (128 / BITS); i++) { \ + const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ + const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ + \ + s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ + } \ +} +DEF_VAVG(8) +DEF_VAVG(16) + +#define DEF_VAVGL(BITS) \ +void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + int i; \ + \ + for (i = 0; i < (128 / BITS); i++) { \ + const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ + const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ + \ + s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1); \ + } \ +} +DEF_VAVGL(8) +DEF_VAVGL(16) + +#define DEF_VCLZ(BITS) \ +void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc) \ +{ \ + int i; \ + \ + for (i = 0; i < (128 / BITS); i++) { \ + const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ + \ + s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS); \ + } \ +} +DEF_VCLZ(8) +DEF_VCLZ(16) + +#define DEF_VCTZ(BITS) \ +void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc) \ +{ \ + int i; \ + \ + for (i = 0; i < (128 / BITS); i++) { \ + const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ + \ + s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS); \ + } \ +} +DEF_VCTZ(8) +DEF_VCTZ(16) + +/* like binary multiplication, but XOR instead of addition */ +#define DEF_GALOIS_MULTIPLY(BITS, TBITS) \ +static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a, \ + uint##TBITS##_t b) \ +{ \ + uint##TBITS##_t res = 0; \ + \ + while (b) { \ + if (b & 0x1) { \ + res = res ^ a; \ + } \ + a = a << 1; \ + b = b >> 1; \ + } \ + return res; \ +} +DEF_GALOIS_MULTIPLY(8, 16) +DEF_GALOIS_MULTIPLY(16, 32) +DEF_GALOIS_MULTIPLY(32, 64) + +static S390Vector galois_multiply64(uint64_t a, uint64_t b) +{ + S390Vector res = {}; + S390Vector va = { + .doubleword[1] = a, + }; + S390Vector vb = { + .doubleword[1] = b, + }; + + while (!s390_vec_is_zero(&vb)) { + if (vb.doubleword[1] & 0x1) { + s390_vec_xor(&res, &res, &va); + } + s390_vec_shl(&va, &va, 1); + s390_vec_shr(&vb, &vb, 1); + } + return res; +} + +#define DEF_VGFM(BITS, TBITS) \ +void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + int i; \ + \ + for (i = 0; i < (128 / TBITS); i++) { \ + uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ + uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ + uint##TBITS##_t d = galois_multiply##BITS(a, b); \ + \ + a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ + b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ + d = d ^ galois_multiply32(a, b); \ + s390_vec_write_element##TBITS(v1, i, d); \ + } \ +} +DEF_VGFM(8, 16) +DEF_VGFM(16, 32) +DEF_VGFM(32, 64) + +void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3, + uint32_t desc) +{ + S390Vector tmp1, tmp2; + uint64_t a, b; + + a = s390_vec_read_element64(v2, 0); + b = s390_vec_read_element64(v3, 0); + tmp1 = galois_multiply64(a, b); + a = s390_vec_read_element64(v2, 1); + b = s390_vec_read_element64(v3, 1); + tmp2 = galois_multiply64(a, b); + s390_vec_xor(v1, &tmp1, &tmp2); +} + +#define DEF_VGFMA(BITS, TBITS) \ +void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, uint32_t desc) \ +{ \ + int i; \ + \ + for (i = 0; i < (128 / TBITS); i++) { \ + uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2); \ + uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2); \ + uint##TBITS##_t d = galois_multiply##BITS(a, b); \ + \ + a = s390_vec_read_element##BITS(v2, i * 2 + 1); \ + b = s390_vec_read_element##BITS(v3, i * 2 + 1); \ + d = d ^ galois_multiply32(a, b); \ + d = d ^ s390_vec_read_element##TBITS(v4, i); \ + s390_vec_write_element##TBITS(v1, i, d); \ + } \ +} +DEF_VGFMA(8, 16) +DEF_VGFMA(16, 32) +DEF_VGFMA(32, 64) + +void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3, + const void *v4, uint32_t desc) +{ + S390Vector tmp1, tmp2; + uint64_t a, b; + + a = s390_vec_read_element64(v2, 0); + b = s390_vec_read_element64(v3, 0); + tmp1 = galois_multiply64(a, b); + a = s390_vec_read_element64(v2, 1); + b = s390_vec_read_element64(v3, 1); + tmp2 = galois_multiply64(a, b); + s390_vec_xor(&tmp1, &tmp1, &tmp2); + s390_vec_xor(v1, &tmp1, v4); +} + +#define DEF_VMAL(BITS) \ +void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, uint32_t desc) \ +{ \ + int i; \ + \ + for (i = 0; i < (128 / BITS); i++) { \ + const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ + const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ + const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ + \ + s390_vec_write_element##BITS(v1, i, a * b + c); \ + } \ +} +DEF_VMAL(8) +DEF_VMAL(16) + +#define DEF_VMAH(BITS) \ +void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, uint32_t desc) \ +{ \ + int i; \ + \ + for (i = 0; i < (128 / BITS); i++) { \ + const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ + const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ + const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i); \ + \ + s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ + } \ +} +DEF_VMAH(8) +DEF_VMAH(16) + +#define DEF_VMALH(BITS) \ +void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, uint32_t desc) \ +{ \ + int i; \ + \ + for (i = 0; i < (128 / BITS); i++) { \ + const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ + const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ + const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i); \ + \ + s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS); \ + } \ +} +DEF_VMALH(8) +DEF_VMALH(16) + +#define DEF_VMAE(BITS, TBITS) \ +void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, uint32_t desc) \ +{ \ + int i, j; \ + \ + for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ + int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ + int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ + int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ + \ + s390_vec_write_element##TBITS(v1, i, a * b + c); \ + } \ +} +DEF_VMAE(8, 16) +DEF_VMAE(16, 32) +DEF_VMAE(32, 64) + +#define DEF_VMALE(BITS, TBITS) \ +void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, uint32_t desc) \ +{ \ + int i, j; \ + \ + for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ + uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ + uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ + uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ + \ + s390_vec_write_element##TBITS(v1, i, a * b + c); \ + } \ +} +DEF_VMALE(8, 16) +DEF_VMALE(16, 32) +DEF_VMALE(32, 64) + +#define DEF_VMAO(BITS, TBITS) \ +void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, uint32_t desc) \ +{ \ + int i, j; \ + \ + for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ + int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ + int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ + int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ + \ + s390_vec_write_element##TBITS(v1, i, a * b + c); \ + } \ +} +DEF_VMAO(8, 16) +DEF_VMAO(16, 32) +DEF_VMAO(32, 64) + +#define DEF_VMALO(BITS, TBITS) \ +void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, uint32_t desc) \ +{ \ + int i, j; \ + \ + for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ + uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ + uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ + uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \ + \ + s390_vec_write_element##TBITS(v1, i, a * b + c); \ + } \ +} +DEF_VMALO(8, 16) +DEF_VMALO(16, 32) +DEF_VMALO(32, 64) + +#define DEF_VMH(BITS) \ +void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + int i; \ + \ + for (i = 0; i < (128 / BITS); i++) { \ + const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i); \ + const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i); \ + \ + s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ + } \ +} +DEF_VMH(8) +DEF_VMH(16) + +#define DEF_VMLH(BITS) \ +void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + int i; \ + \ + for (i = 0; i < (128 / BITS); i++) { \ + const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ + const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ + \ + s390_vec_write_element##BITS(v1, i, (a * b) >> BITS); \ + } \ +} +DEF_VMLH(8) +DEF_VMLH(16) + +#define DEF_VME(BITS, TBITS) \ +void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + int i, j; \ + \ + for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ + int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ + int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ + \ + s390_vec_write_element##TBITS(v1, i, a * b); \ + } \ +} +DEF_VME(8, 16) +DEF_VME(16, 32) +DEF_VME(32, 64) + +#define DEF_VMLE(BITS, TBITS) \ +void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + int i, j; \ + \ + for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \ + const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ + const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ + \ + s390_vec_write_element##TBITS(v1, i, a * b); \ + } \ +} +DEF_VMLE(8, 16) +DEF_VMLE(16, 32) +DEF_VMLE(32, 64) + +#define DEF_VMO(BITS, TBITS) \ +void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + int i, j; \ + \ + for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ + int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \ + int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \ + \ + s390_vec_write_element##TBITS(v1, i, a * b); \ + } \ +} +DEF_VMO(8, 16) +DEF_VMO(16, 32) +DEF_VMO(32, 64) + +#define DEF_VMLO(BITS, TBITS) \ +void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + int i, j; \ + \ + for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \ + const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \ + const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \ + \ + s390_vec_write_element##TBITS(v1, i, a * b); \ + } \ +} +DEF_VMLO(8, 16) +DEF_VMLO(16, 32) +DEF_VMLO(32, 64) + +#define DEF_VPOPCT(BITS) \ +void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc) \ +{ \ + int i; \ + \ + for (i = 0; i < (128 / BITS); i++) { \ + const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ + \ + s390_vec_write_element##BITS(v1, i, ctpop32(a)); \ + } \ +} +DEF_VPOPCT(8) +DEF_VPOPCT(16) + +#define DEF_VERIM(BITS) \ +void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + const uint8_t count = simd_data(desc); \ + int i; \ + \ + for (i = 0; i < (128 / BITS); i++) { \ + const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i); \ + const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i); \ + const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i); \ + const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask); \ + \ + s390_vec_write_element##BITS(v1, i, d); \ + } \ +} +DEF_VERIM(8) +DEF_VERIM(16) + +void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count, + uint32_t desc) +{ + s390_vec_shl(v1, v2, count); +} + +void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count, + uint32_t desc) +{ + s390_vec_sar(v1, v2, count); +} + +void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count, + uint32_t desc) +{ + s390_vec_shr(v1, v2, count); +} + +#define DEF_VSCBI(BITS) \ +void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + int i; \ + \ + for (i = 0; i < (128 / BITS); i++) { \ + const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i); \ + const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i); \ + \ + s390_vec_write_element##BITS(v1, i, a >= b); \ + } \ +} +DEF_VSCBI(8) +DEF_VSCBI(16) + +void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + S390Vector tmp; + + s390_vec_and(&tmp, v1, v2); + if (s390_vec_is_zero(&tmp)) { + /* Selected bits all zeros; or all mask bits zero */ + env->cc_op = 0; + } else if (s390_vec_equal(&tmp, v2)) { + /* Selected bits all ones */ + env->cc_op = 3; + } else { + /* Selected bits a mix of zeros and ones */ + env->cc_op = 1; + } +} diff --git a/target/s390x/tcg/vec_string_helper.c b/target/s390x/tcg/vec_string_helper.c new file mode 100644 index 0000000000..ac315eb095 --- /dev/null +++ b/target/s390x/tcg/vec_string_helper.c @@ -0,0 +1,473 @@ +/* + * QEMU TCG support -- s390x vector string instruction support + * + * Copyright (C) 2019 Red Hat Inc + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "cpu.h" +#include "s390x-internal.h" +#include "vec.h" +#include "tcg/tcg.h" +#include "tcg/tcg-gvec-desc.h" +#include "exec/helper-proto.h" + +/* + * Returns a bit set in the MSB of each element that is zero, + * as defined by the mask. + */ +static inline uint64_t zero_search(uint64_t a, uint64_t mask) +{ + return ~(((a & mask) + mask) | a | mask); +} + +/* + * Returns a bit set in the MSB of each element that is not zero, + * as defined by the mask. + */ +static inline uint64_t nonzero_search(uint64_t a, uint64_t mask) +{ + return (((a & mask) + mask) | a) & ~mask; +} + +/* + * Returns the byte offset for the first match, or 16 for no match. + */ +static inline int match_index(uint64_t c0, uint64_t c1) +{ + return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3; +} + +/* + * Returns the number of bits composing one element. + */ +static uint8_t get_element_bits(uint8_t es) +{ + return (1 << es) * BITS_PER_BYTE; +} + +/* + * Returns the bitmask for a single element. + */ +static uint64_t get_single_element_mask(uint8_t es) +{ + return -1ull >> (64 - get_element_bits(es)); +} + +/* + * Returns the bitmask for a single element (excluding the MSB). + */ +static uint64_t get_single_element_lsbs_mask(uint8_t es) +{ + return -1ull >> (65 - get_element_bits(es)); +} + +/* + * Returns the bitmasks for multiple elements (excluding the MSBs). + */ +static uint64_t get_element_lsbs_mask(uint8_t es) +{ + return dup_const(es, get_single_element_lsbs_mask(es)); +} + +static int vfae(void *v1, const void *v2, const void *v3, bool in, + bool rt, bool zs, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + const int bits = get_element_bits(es); + uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1; + uint64_t first_zero = 16; + uint64_t first_equal; + int i; + + a0 = s390_vec_read_element64(v2, 0); + a1 = s390_vec_read_element64(v2, 1); + b0 = s390_vec_read_element64(v3, 0); + b1 = s390_vec_read_element64(v3, 1); + e0 = 0; + e1 = 0; + /* compare against equality with every other element */ + for (i = 0; i < 64; i += bits) { + t0 = rol64(b0, i); + t1 = rol64(b1, i); + e0 |= zero_search(a0 ^ t0, mask); + e0 |= zero_search(a0 ^ t1, mask); + e1 |= zero_search(a1 ^ t0, mask); + e1 |= zero_search(a1 ^ t1, mask); + } + /* invert the result if requested - invert only the MSBs */ + if (in) { + e0 = ~e0 & ~mask; + e1 = ~e1 & ~mask; + } + first_equal = match_index(e0, e1); + + if (zs) { + z0 = zero_search(a0, mask); + z1 = zero_search(a1, mask); + first_zero = match_index(z0, z1); + } + + if (rt) { + e0 = (e0 >> (bits - 1)) * get_single_element_mask(es); + e1 = (e1 >> (bits - 1)) * get_single_element_mask(es); + s390_vec_write_element64(v1, 0, e0); + s390_vec_write_element64(v1, 1, e1); + } else { + s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); + s390_vec_write_element64(v1, 1, 0); + } + + if (first_zero == 16 && first_equal == 16) { + return 3; /* no match */ + } else if (first_zero == 16) { + return 1; /* matching elements, no match for zero */ + } else if (first_equal < first_zero) { + return 2; /* matching elements before match for zero */ + } + return 0; /* match for zero */ +} + +#define DEF_VFAE_HELPER(BITS) \ +void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool rt = extract32(simd_data(desc), 2, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ +} +DEF_VFAE_HELPER(8) +DEF_VFAE_HELPER(16) +DEF_VFAE_HELPER(32) + +#define DEF_VFAE_CC_HELPER(BITS) \ +void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool rt = extract32(simd_data(desc), 2, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ +} +DEF_VFAE_CC_HELPER(8) +DEF_VFAE_CC_HELPER(16) +DEF_VFAE_CC_HELPER(32) + +static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + uint64_t a0, a1, b0, b1, e0, e1, z0, z1; + uint64_t first_zero = 16; + uint64_t first_equal; + + a0 = s390_vec_read_element64(v2, 0); + a1 = s390_vec_read_element64(v2, 1); + b0 = s390_vec_read_element64(v3, 0); + b1 = s390_vec_read_element64(v3, 1); + e0 = zero_search(a0 ^ b0, mask); + e1 = zero_search(a1 ^ b1, mask); + first_equal = match_index(e0, e1); + + if (zs) { + z0 = zero_search(a0, mask); + z1 = zero_search(a1, mask); + first_zero = match_index(z0, z1); + } + + s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); + s390_vec_write_element64(v1, 1, 0); + if (first_zero == 16 && first_equal == 16) { + return 3; /* no match */ + } else if (first_zero == 16) { + return 1; /* matching elements, no match for zero */ + } else if (first_equal < first_zero) { + return 2; /* matching elements before match for zero */ + } + return 0; /* match for zero */ +} + +#define DEF_VFEE_HELPER(BITS) \ +void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vfee(v1, v2, v3, zs, MO_##BITS); \ +} +DEF_VFEE_HELPER(8) +DEF_VFEE_HELPER(16) +DEF_VFEE_HELPER(32) + +#define DEF_VFEE_CC_HELPER(BITS) \ +void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \ +} +DEF_VFEE_CC_HELPER(8) +DEF_VFEE_CC_HELPER(16) +DEF_VFEE_CC_HELPER(32) + +static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + uint64_t a0, a1, b0, b1, e0, e1, z0, z1; + uint64_t first_zero = 16; + uint64_t first_inequal; + bool smaller = false; + + a0 = s390_vec_read_element64(v2, 0); + a1 = s390_vec_read_element64(v2, 1); + b0 = s390_vec_read_element64(v3, 0); + b1 = s390_vec_read_element64(v3, 1); + e0 = nonzero_search(a0 ^ b0, mask); + e1 = nonzero_search(a1 ^ b1, mask); + first_inequal = match_index(e0, e1); + + /* identify the smaller element */ + if (first_inequal < 16) { + uint8_t enr = first_inequal / (1 << es); + uint32_t a = s390_vec_read_element(v2, enr, es); + uint32_t b = s390_vec_read_element(v3, enr, es); + + smaller = a < b; + } + + if (zs) { + z0 = zero_search(a0, mask); + z1 = zero_search(a1, mask); + first_zero = match_index(z0, z1); + } + + s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero)); + s390_vec_write_element64(v1, 1, 0); + if (first_zero == 16 && first_inequal == 16) { + return 3; + } else if (first_zero < first_inequal) { + return 0; + } + return smaller ? 1 : 2; +} + +#define DEF_VFENE_HELPER(BITS) \ +void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vfene(v1, v2, v3, zs, MO_##BITS); \ +} +DEF_VFENE_HELPER(8) +DEF_VFENE_HELPER(16) +DEF_VFENE_HELPER(32) + +#define DEF_VFENE_CC_HELPER(BITS) \ +void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \ +} +DEF_VFENE_CC_HELPER(8) +DEF_VFENE_CC_HELPER(16) +DEF_VFENE_CC_HELPER(32) + +static int vistr(void *v1, const void *v2, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + uint64_t a0 = s390_vec_read_element64(v2, 0); + uint64_t a1 = s390_vec_read_element64(v2, 1); + uint64_t z; + int cc = 3; + + z = zero_search(a0, mask); + if (z) { + a0 &= ~(-1ull >> clz64(z)); + a1 = 0; + cc = 0; + } else { + z = zero_search(a1, mask); + if (z) { + a1 &= ~(-1ull >> clz64(z)); + cc = 0; + } + } + + s390_vec_write_element64(v1, 0, a0); + s390_vec_write_element64(v1, 1, a1); + return cc; +} + +#define DEF_VISTR_HELPER(BITS) \ +void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc) \ +{ \ + vistr(v1, v2, MO_##BITS); \ +} +DEF_VISTR_HELPER(8) +DEF_VISTR_HELPER(16) +DEF_VISTR_HELPER(32) + +#define DEF_VISTR_CC_HELPER(BITS) \ +void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \ + uint32_t desc) \ +{ \ + env->cc_op = vistr(v1, v2, MO_##BITS); \ +} +DEF_VISTR_CC_HELPER(8) +DEF_VISTR_CC_HELPER(16) +DEF_VISTR_CC_HELPER(32) + +static bool element_compare(uint32_t data, uint32_t l, uint8_t c) +{ + const bool equal = extract32(c, 7, 1); + const bool lower = extract32(c, 6, 1); + const bool higher = extract32(c, 5, 1); + + if (data < l) { + return lower; + } else if (data > l) { + return higher; + } + return equal; +} + +static int vstrc(void *v1, const void *v2, const void *v3, const void *v4, + bool in, bool rt, bool zs, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + uint64_t a0 = s390_vec_read_element64(v2, 0); + uint64_t a1 = s390_vec_read_element64(v2, 1); + int first_zero = 16, first_match = 16; + S390Vector rt_result = {}; + uint64_t z0, z1; + int i, j; + + if (zs) { + z0 = zero_search(a0, mask); + z1 = zero_search(a1, mask); + first_zero = match_index(z0, z1); + } + + for (i = 0; i < 16 / (1 << es); i++) { + const uint32_t data = s390_vec_read_element(v2, i, es); + const int cur_byte = i * (1 << es); + bool any_match = false; + + /* if we don't need a bit vector, we can stop early */ + if (cur_byte == first_zero && !rt) { + break; + } + + for (j = 0; j < 16 / (1 << es); j += 2) { + const uint32_t l1 = s390_vec_read_element(v3, j, es); + const uint32_t l2 = s390_vec_read_element(v3, j + 1, es); + /* we are only interested in the highest byte of each element */ + const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es)); + const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es)); + + if (element_compare(data, l1, c1) && + element_compare(data, l2, c2)) { + any_match = true; + break; + } + } + /* invert the result if requested */ + any_match = in ^ any_match; + + if (any_match) { + /* indicate bit vector if requested */ + if (rt) { + const uint64_t val = -1ull; + + first_match = MIN(cur_byte, first_match); + s390_vec_write_element(&rt_result, i, es, val); + } else { + /* stop on the first match */ + first_match = cur_byte; + break; + } + } + } + + if (rt) { + *(S390Vector *)v1 = rt_result; + } else { + s390_vec_write_element64(v1, 0, MIN(first_match, first_zero)); + s390_vec_write_element64(v1, 1, 0); + } + + if (first_zero == 16 && first_match == 16) { + return 3; /* no match */ + } else if (first_zero == 16) { + return 1; /* matching elements, no match for zero */ + } else if (first_match < first_zero) { + return 2; /* matching elements before match for zero */ + } + return 0; /* match for zero */ +} + +#define DEF_VSTRC_HELPER(BITS) \ +void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \ +} +DEF_VSTRC_HELPER(8) +DEF_VSTRC_HELPER(16) +DEF_VSTRC_HELPER(32) + +#define DEF_VSTRC_RT_HELPER(BITS) \ +void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \ +} +DEF_VSTRC_RT_HELPER(8) +DEF_VSTRC_RT_HELPER(16) +DEF_VSTRC_RT_HELPER(32) + +#define DEF_VSTRC_CC_HELPER(BITS) \ +void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, CPUS390XState *env, \ + uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \ +} +DEF_VSTRC_CC_HELPER(8) +DEF_VSTRC_CC_HELPER(16) +DEF_VSTRC_CC_HELPER(32) + +#define DEF_VSTRC_CC_RT_HELPER(BITS) \ +void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, CPUS390XState *env, \ + uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \ +} +DEF_VSTRC_CC_RT_HELPER(8) +DEF_VSTRC_CC_RT_HELPER(16) +DEF_VSTRC_CC_RT_HELPER(32) |