summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Maydell2020-05-14 17:17:55 +0200
committerPeter Maydell2020-05-14 17:17:55 +0200
commit013a18edbbc59cdad019100c7d03c0494642b74c (patch)
tree3728328ab803851eba7abda5a0c313638da2178d
parentMerge remote-tracking branch 'remotes/edgar/tags/edgar/xilinx-next-2020-05-14... (diff)
parenttarget/arm: Convert NEON VFMA, VFMS 3-reg-same insns to decodetree (diff)
downloadqemu-013a18edbbc59cdad019100c7d03c0494642b74c.tar.gz
qemu-013a18edbbc59cdad019100c7d03c0494642b74c.tar.xz
qemu-013a18edbbc59cdad019100c7d03c0494642b74c.zip
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200514' into staging
target-arm queue: * target/arm: Use correct GDB XML for M-profile cores * target/arm: Code cleanup to use gvec APIs better * aspeed: Add support for the sonorapass-bmc board * target/arm: Support reporting KVM host memory errors to the guest via ACPI notifications * target/arm: Finish conversion of Neon 3-reg-same insns to decodetree # gpg: Signature made Thu 14 May 2020 15:19:15 BST # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20200514: (45 commits) target/arm: Convert NEON VFMA, VFMS 3-reg-same insns to decodetree target/arm: Convert Neon fp VMAX/VMIN/VMAXNM/VMINNM/VRECPS/VRSQRTS to decodetree target/arm: Move 'env' argument of recps_f32 and rsqrts_f32 helpers to usual place target/arm: Convert Neon 3-reg-same compare insns to decodetree target/arm: Convert Neon fp VMUL, VMLA, VMLS 3-reg-same insns to decodetree target/arm: Convert Neon VPMIN/VPMAX/VPADD float 3-reg-same insns to decodetree target/arm: Convert Neon VADD, VSUB, VABD 3-reg-same insns to decodetree target/arm: Convert Neon VQDMULH/VQRDMULH 3-reg-same to decodetree target/arm: Convert Neon VPADD 3-reg-same insns to decodetree target/arm: Convert Neon VPMAX/VPMIN 3-reg-same insns to decodetree target/arm: Convert Neon VQSHL, VRSHL, VQRSHL 3-reg-same insns to decodetree target/arm: Convert Neon VRHADD, VHSUB 3-reg-same insns to decodetree target/arm: Convert Neon VABA/VABD 3-reg-same to decodetree target/arm: Convert Neon VHADD 3-reg-same insns target/arm: Convert Neon 64-bit element 3-reg-same insns target/arm: Convert Neon 3-reg-same SHA to decodetree target/arm: Convert Neon 3-reg-same VQRDMLAH/VQRDMLSH to decodetree MAINTAINERS: Add ACPI/HEST/GHES entries target-arm: kvm64: handle SIGBUS signal from kernel or KVM ACPI: Record Generic Error Status Block(GESB) table ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--MAINTAINERS9
-rw-r--r--accel/kvm/kvm-all.c36
-rwxr-xr-xconfigure4
-rw-r--r--default-configs/arm-softmmu.mak1
-rw-r--r--docs/specs/acpi_hest_ghes.rst110
-rw-r--r--docs/specs/index.rst1
-rw-r--r--gdb-xml/arm-m-profile.xml27
-rw-r--r--hw/acpi/Kconfig4
-rw-r--r--hw/acpi/Makefile.objs1
-rw-r--r--hw/acpi/aml-build.c2
-rw-r--r--hw/acpi/generic_event_device.c19
-rw-r--r--hw/acpi/ghes.c448
-rw-r--r--hw/acpi/nvdimm.c10
-rw-r--r--hw/arm/aspeed.c78
-rw-r--r--hw/arm/virt-acpi-build.c15
-rw-r--r--hw/arm/virt.c23
-rw-r--r--include/hw/acpi/aml-build.h1
-rw-r--r--include/hw/acpi/generic_event_device.h2
-rw-r--r--include/hw/acpi/ghes.h74
-rw-r--r--include/hw/arm/virt.h1
-rw-r--r--include/qemu/uuid.h27
-rw-r--r--include/sysemu/kvm.h3
-rw-r--r--include/sysemu/kvm_int.h12
-rw-r--r--target/arm/cpu.h4
-rw-r--r--target/arm/cpu_tcg.c1
-rw-r--r--target/arm/gdbstub.c22
-rw-r--r--target/arm/helper.c2
-rw-r--r--target/arm/helper.h78
-rw-r--r--target/arm/internals.h5
-rw-r--r--target/arm/kvm64.c77
-rw-r--r--target/arm/neon-dp.decode119
-rw-r--r--target/arm/neon_helper.c17
-rw-r--r--target/arm/tlb_helper.c2
-rw-r--r--target/arm/translate-a64.c210
-rw-r--r--target/arm/translate-neon.inc.c664
-rw-r--r--target/arm/translate.c2353
-rw-r--r--target/arm/translate.h84
-rw-r--r--target/arm/vec_helper.c240
-rw-r--r--target/arm/vfp_helper.c9
-rw-r--r--target/i386/cpu.h2
-rw-r--r--target/i386/kvm.c36
41 files changed, 3395 insertions, 1438 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index d11f3cb976..f02e290702 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1516,6 +1516,15 @@ F: tests/qtest/bios-tables-test.c
F: tests/qtest/acpi-utils.[hc]
F: tests/data/acpi/
+ACPI/HEST/GHES
+R: Dongjiu Geng <gengdongjiu@huawei.com>
+R: Xiang Zheng <zhengxiang9@huawei.com>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/acpi/ghes.c
+F: include/hw/acpi/ghes.h
+F: docs/specs/acpi_hest_ghes.rst
+
ppc4xx
M: David Gibson <david@gibson.dropbear.id.au>
L: qemu-ppc@nongnu.org
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 439a4efe52..36be11795d 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -44,6 +44,7 @@
#include "qapi/visitor.h"
#include "qapi/qapi-types-common.h"
#include "qapi/qapi-visit-common.h"
+#include "sysemu/reset.h"
#include "hw/boards.h"
@@ -883,6 +884,39 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension)
return ret;
}
+typedef struct HWPoisonPage {
+ ram_addr_t ram_addr;
+ QLIST_ENTRY(HWPoisonPage) list;
+} HWPoisonPage;
+
+static QLIST_HEAD(, HWPoisonPage) hwpoison_page_list =
+ QLIST_HEAD_INITIALIZER(hwpoison_page_list);
+
+static void kvm_unpoison_all(void *param)
+{
+ HWPoisonPage *page, *next_page;
+
+ QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
+ QLIST_REMOVE(page, list);
+ qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE);
+ g_free(page);
+ }
+}
+
+void kvm_hwpoison_page_add(ram_addr_t ram_addr)
+{
+ HWPoisonPage *page;
+
+ QLIST_FOREACH(page, &hwpoison_page_list, list) {
+ if (page->ram_addr == ram_addr) {
+ return;
+ }
+ }
+ page = g_new(HWPoisonPage, 1);
+ page->ram_addr = ram_addr;
+ QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
+}
+
static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
{
#if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
@@ -2085,6 +2119,8 @@ static int kvm_init(MachineState *ms)
s->kernel_irqchip_split = mc->default_kernel_irqchip_split ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
}
+ qemu_register_reset(kvm_unpoison_all, NULL);
+
if (s->kernel_irqchip_allowed) {
kvm_irqchip_create(s);
}
diff --git a/configure b/configure
index c50c006b86..26084fc53a 100755
--- a/configure
+++ b/configure
@@ -7806,14 +7806,14 @@ case "$target_name" in
TARGET_SYSTBL_ABI=common,oabi
bflt="yes"
mttcg="yes"
- gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
+ gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml arm-m-profile.xml"
;;
aarch64|aarch64_be)
TARGET_ARCH=aarch64
TARGET_BASE_ARCH=arm
bflt="yes"
mttcg="yes"
- gdb_xml_files="aarch64-core.xml aarch64-fpu.xml arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
+ gdb_xml_files="aarch64-core.xml aarch64-fpu.xml arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml arm-m-profile.xml"
;;
cris)
;;
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index 36a0e89daa..8fc09a4a51 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -42,3 +42,4 @@ CONFIG_FSL_IMX7=y
CONFIG_FSL_IMX6UL=y
CONFIG_SEMIHOSTING=y
CONFIG_ALLWINNER_H3=y
+CONFIG_ACPI_APEI=y
diff --git a/docs/specs/acpi_hest_ghes.rst b/docs/specs/acpi_hest_ghes.rst
new file mode 100644
index 0000000000..68f1fbe0a4
--- /dev/null
+++ b/docs/specs/acpi_hest_ghes.rst
@@ -0,0 +1,110 @@
+APEI tables generating and CPER record
+======================================
+
+..
+ Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD.
+
+ This work is licensed under the terms of the GNU GPL, version 2 or later.
+ See the COPYING file in the top-level directory.
+
+Design Details
+--------------
+
+::
+
+ etc/acpi/tables etc/hardware_errors
+ ==================== ===============================
+ + +--------------------------+ +----------------------------+
+ | | HEST | +--------->| error_block_address1 |------+
+ | +--------------------------+ | +----------------------------+ |
+ | | GHES1 | | +------->| error_block_address2 |------+-+
+ | +--------------------------+ | | +----------------------------+ | |
+ | | ................. | | | | .............. | | |
+ | | error_status_address-----+-+ | -----------------------------+ | |
+ | | ................. | | +--->| error_block_addressN |------+-+---+
+ | | read_ack_register--------+-+ | | +----------------------------+ | | |
+ | | read_ack_preserve | +-+---+--->| read_ack_register1 | | | |
+ | | read_ack_write | | | +----------------------------+ | | |
+ + +--------------------------+ | +-+--->| read_ack_register2 | | | |
+ | | GHES2 | | | | +----------------------------+ | | |
+ + +--------------------------+ | | | | ............. | | | |
+ | | ................. | | | | +----------------------------+ | | |
+ | | error_status_address-----+---+ | | +->| read_ack_registerN | | | |
+ | | ................. | | | | +----------------------------+ | | |
+ | | read_ack_register--------+-----+ | | |Generic Error Status Block 1|<-----+ | |
+ | | read_ack_preserve | | | |-+------------------------+-+ | |
+ | | read_ack_write | | | | | CPER | | | |
+ + +--------------------------| | | | | CPER | | | |
+ | | ............... | | | | | .... | | | |
+ + +--------------------------+ | | | | CPER | | | |
+ | | GHESN | | | |-+------------------------+-| | |
+ + +--------------------------+ | | |Generic Error Status Block 2|<-------+ |
+ | | ................. | | | |-+------------------------+-+ |
+ | | error_status_address-----+-------+ | | | CPER | | |
+ | | ................. | | | | CPER | | |
+ | | read_ack_register--------+---------+ | | .... | | |
+ | | read_ack_preserve | | | CPER | | |
+ | | read_ack_write | +-+------------------------+-+ |
+ + +--------------------------+ | .......... | |
+ |----------------------------+ |
+ |Generic Error Status Block N |<----------+
+ |-+-------------------------+-+
+ | | CPER | |
+ | | CPER | |
+ | | .... | |
+ | | CPER | |
+ +-+-------------------------+-+
+
+
+(1) QEMU generates the ACPI HEST table. This table goes in the current
+ "etc/acpi/tables" fw_cfg blob. Each error source has different
+ notification types.
+
+(2) A new fw_cfg blob called "etc/hardware_errors" is introduced. QEMU
+ also needs to populate this blob. The "etc/hardware_errors" fw_cfg blob
+ contains an address registers table and an Error Status Data Block table.
+
+(3) The address registers table contains N Error Block Address entries
+ and N Read Ack Register entries. The size for each entry is 8-byte.
+ The Error Status Data Block table contains N Error Status Data Block
+ entries. The size for each entry is 4096(0x1000) bytes. The total size
+ for the "etc/hardware_errors" fw_cfg blob is (N * 8 * 2 + N * 4096) bytes.
+ N is the number of the kinds of hardware error sources.
+
+(4) QEMU generates the ACPI linker/loader script for the firmware. The
+ firmware pre-allocates memory for "etc/acpi/tables", "etc/hardware_errors"
+ and copies blob contents there.
+
+(5) QEMU generates N ADD_POINTER commands, which patch addresses in the
+ "error_status_address" fields of the HEST table with a pointer to the
+ corresponding "address registers" in the "etc/hardware_errors" blob.
+
+(6) QEMU generates N ADD_POINTER commands, which patch addresses in the
+ "read_ack_register" fields of the HEST table with a pointer to the
+ corresponding "read_ack_register" within the "etc/hardware_errors" blob.
+
+(7) QEMU generates N ADD_POINTER commands for the firmware, which patch
+ addresses in the "error_block_address" fields with a pointer to the
+ respective "Error Status Data Block" in the "etc/hardware_errors" blob.
+
+(8) QEMU defines a third and write-only fw_cfg blob which is called
+ "etc/hardware_errors_addr". Through that blob, the firmware can send back
+ the guest-side allocation addresses to QEMU. The "etc/hardware_errors_addr"
+ blob contains a 8-byte entry. QEMU generates a single WRITE_POINTER command
+ for the firmware. The firmware will write back the start address of
+ "etc/hardware_errors" blob to the fw_cfg file "etc/hardware_errors_addr".
+
+(9) When QEMU gets a SIGBUS from the kernel, QEMU writes CPER into corresponding
+ "Error Status Data Block", guest memory, and then injects platform specific
+ interrupt (in case of arm/virt machine it's Synchronous External Abort) as a
+ notification which is necessary for notifying the guest.
+
+(10) This notification (in virtual hardware) will be handled by the guest
+ kernel, on receiving notification, guest APEI driver could read the CPER error
+ and take appropriate action.
+
+(11) kvm_arch_on_sigbus_vcpu() uses source_id as index in "etc/hardware_errors" to
+ find out "Error Status Data Block" entry corresponding to error source. So supported
+ source_id values should be assigned here and not be changed afterwards to make sure
+ that guest will write error into expected "Error Status Data Block" even if guest was
+ migrated to a newer QEMU.
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
index de46a8b5e7..426632a475 100644
--- a/docs/specs/index.rst
+++ b/docs/specs/index.rst
@@ -14,3 +14,4 @@ Contents:
ppc-spapr-xive
acpi_hw_reduced_hotplug
tpm
+ acpi_hest_ghes
diff --git a/gdb-xml/arm-m-profile.xml b/gdb-xml/arm-m-profile.xml
new file mode 100644
index 0000000000..5319d764ee
--- /dev/null
+++ b/gdb-xml/arm-m-profile.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2020 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.arm.m-profile">
+ <reg name="r0" bitsize="32"/>
+ <reg name="r1" bitsize="32"/>
+ <reg name="r2" bitsize="32"/>
+ <reg name="r3" bitsize="32"/>
+ <reg name="r4" bitsize="32"/>
+ <reg name="r5" bitsize="32"/>
+ <reg name="r6" bitsize="32"/>
+ <reg name="r7" bitsize="32"/>
+ <reg name="r8" bitsize="32"/>
+ <reg name="r9" bitsize="32"/>
+ <reg name="r10" bitsize="32"/>
+ <reg name="r11" bitsize="32"/>
+ <reg name="r12" bitsize="32"/>
+ <reg name="sp" bitsize="32" type="data_ptr"/>
+ <reg name="lr" bitsize="32"/>
+ <reg name="pc" bitsize="32" type="code_ptr"/>
+ <reg name="xpsr" bitsize="32" regnum="25"/>
+</feature>
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 54209c6f2f..1932f66af8 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -28,6 +28,10 @@ config ACPI_HMAT
bool
depends on ACPI
+config ACPI_APEI
+ bool
+ depends on ACPI
+
config ACPI_PCI
bool
depends on ACPI && PCI
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index cab9bcd457..72886c7965 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -8,6 +8,7 @@ common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o
common-obj-$(CONFIG_ACPI_HMAT) += hmat.o
+common-obj-$(CONFIG_ACPI_APEI) += ghes.o
common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
common-obj-$(call lnot,$(CONFIG_PC)) += acpi-x86-stub.o
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 2c3702b882..3681ec6e3d 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1578,6 +1578,7 @@ void acpi_build_tables_init(AcpiBuildTables *tables)
tables->table_data = g_array_new(false, true /* clear */, 1);
tables->tcpalog = g_array_new(false, true /* clear */, 1);
tables->vmgenid = g_array_new(false, true /* clear */, 1);
+ tables->hardware_errors = g_array_new(false, true /* clear */, 1);
tables->linker = bios_linker_loader_init();
}
@@ -1588,6 +1589,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre)
g_array_free(tables->table_data, true);
g_array_free(tables->tcpalog, mfre);
g_array_free(tables->vmgenid, mfre);
+ g_array_free(tables->hardware_errors, mfre);
}
/*
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index 5d17f78a1e..b1cbdd86b6 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -247,6 +247,24 @@ static const VMStateDescription vmstate_ged_state = {
}
};
+static bool ghes_needed(void *opaque)
+{
+ AcpiGedState *s = opaque;
+ return s->ghes_state.ghes_addr_le;
+}
+
+static const VMStateDescription vmstate_ghes_state = {
+ .name = "acpi-ged/ghes",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = ghes_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
+ vmstate_ghes_state, AcpiGhesState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_acpi_ged = {
.name = "acpi-ged",
.version_id = 1,
@@ -257,6 +275,7 @@ static const VMStateDescription vmstate_acpi_ged = {
},
.subsections = (const VMStateDescription * []) {
&vmstate_memhp_state,
+ &vmstate_ghes_state,
NULL
}
};
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
new file mode 100644
index 0000000000..b363bc331d
--- /dev/null
+++ b/hw/acpi/ghes.c
@@ -0,0 +1,448 @@
+/*
+ * Support for generating APEI tables and recording CPER for Guests
+ *
+ * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Author: Dongjiu Geng <gengdongjiu@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "hw/acpi/ghes.h"
+#include "hw/acpi/aml-build.h"
+#include "qemu/error-report.h"
+#include "hw/acpi/generic_event_device.h"
+#include "hw/nvram/fw_cfg.h"
+#include "qemu/uuid.h"
+
+#define ACPI_GHES_ERRORS_FW_CFG_FILE "etc/hardware_errors"
+#define ACPI_GHES_DATA_ADDR_FW_CFG_FILE "etc/hardware_errors_addr"
+
+/* The max size in bytes for one error block */
+#define ACPI_GHES_MAX_RAW_DATA_LENGTH (1 * KiB)
+
+/* Now only support ARMv8 SEA notification type error source */
+#define ACPI_GHES_ERROR_SOURCE_COUNT 1
+
+/* Generic Hardware Error Source version 2 */
+#define ACPI_GHES_SOURCE_GENERIC_ERROR_V2 10
+
+/* Address offset in Generic Address Structure(GAS) */
+#define GAS_ADDR_OFFSET 4
+
+/*
+ * The total size of Generic Error Data Entry
+ * ACPI 6.1/6.2: 18.3.2.7.1 Generic Error Data,
+ * Table 18-343 Generic Error Data Entry
+ */
+#define ACPI_GHES_DATA_LENGTH 72
+
+/* The memory section CPER size, UEFI 2.6: N.2.5 Memory Error Section */
+#define ACPI_GHES_MEM_CPER_LENGTH 80
+
+/* Masks for block_status flags */
+#define ACPI_GEBS_UNCORRECTABLE 1
+
+/*
+ * Total size for Generic Error Status Block except Generic Error Data Entries
+ * ACPI 6.2: 18.3.2.7.1 Generic Error Data,
+ * Table 18-380 Generic Error Status Block
+ */
+#define ACPI_GHES_GESB_SIZE 20
+
+/*
+ * Values for error_severity field
+ */
+enum AcpiGenericErrorSeverity {
+ ACPI_CPER_SEV_RECOVERABLE = 0,
+ ACPI_CPER_SEV_FATAL = 1,
+ ACPI_CPER_SEV_CORRECTED = 2,
+ ACPI_CPER_SEV_NONE = 3,
+};
+
+/*
+ * Hardware Error Notification
+ * ACPI 4.0: 17.3.2.7 Hardware Error Notification
+ * Composes dummy Hardware Error Notification descriptor of specified type
+ */
+static void build_ghes_hw_error_notification(GArray *table, const uint8_t type)
+{
+ /* Type */
+ build_append_int_noprefix(table, type, 1);
+ /*
+ * Length:
+ * Total length of the structure in bytes
+ */
+ build_append_int_noprefix(table, 28, 1);
+ /* Configuration Write Enable */
+ build_append_int_noprefix(table, 0, 2);
+ /* Poll Interval */
+ build_append_int_noprefix(table, 0, 4);
+ /* Vector */
+ build_append_int_noprefix(table, 0, 4);
+ /* Switch To Polling Threshold Value */
+ build_append_int_noprefix(table, 0, 4);
+ /* Switch To Polling Threshold Window */
+ build_append_int_noprefix(table, 0, 4);
+ /* Error Threshold Value */
+ build_append_int_noprefix(table, 0, 4);
+ /* Error Threshold Window */
+ build_append_int_noprefix(table, 0, 4);
+}
+
+/*
+ * Generic Error Data Entry
+ * ACPI 6.1: 18.3.2.7.1 Generic Error Data
+ */
+static void acpi_ghes_generic_error_data(GArray *table,
+ const uint8_t *section_type, uint32_t error_severity,
+ uint8_t validation_bits, uint8_t flags,
+ uint32_t error_data_length, QemuUUID fru_id,
+ uint64_t time_stamp)
+{
+ const uint8_t fru_text[20] = {0};
+
+ /* Section Type */
+ g_array_append_vals(table, section_type, 16);
+
+ /* Error Severity */
+ build_append_int_noprefix(table, error_severity, 4);
+ /* Revision */
+ build_append_int_noprefix(table, 0x300, 2);
+ /* Validation Bits */
+ build_append_int_noprefix(table, validation_bits, 1);
+ /* Flags */
+ build_append_int_noprefix(table, flags, 1);
+ /* Error Data Length */
+ build_append_int_noprefix(table, error_data_length, 4);
+
+ /* FRU Id */
+ g_array_append_vals(table, fru_id.data, ARRAY_SIZE(fru_id.data));
+
+ /* FRU Text */
+ g_array_append_vals(table, fru_text, sizeof(fru_text));
+
+ /* Timestamp */
+ build_append_int_noprefix(table, time_stamp, 8);
+}
+
+/*
+ * Generic Error Status Block
+ * ACPI 6.1: 18.3.2.7.1 Generic Error Data
+ */
+static void acpi_ghes_generic_error_status(GArray *table, uint32_t block_status,
+ uint32_t raw_data_offset, uint32_t raw_data_length,
+ uint32_t data_length, uint32_t error_severity)
+{
+ /* Block Status */
+ build_append_int_noprefix(table, block_status, 4);
+ /* Raw Data Offset */
+ build_append_int_noprefix(table, raw_data_offset, 4);
+ /* Raw Data Length */
+ build_append_int_noprefix(table, raw_data_length, 4);
+ /* Data Length */
+ build_append_int_noprefix(table, data_length, 4);
+ /* Error Severity */
+ build_append_int_noprefix(table, error_severity, 4);
+}
+
+/* UEFI 2.6: N.2.5 Memory Error Section */
+static void acpi_ghes_build_append_mem_cper(GArray *table,
+ uint64_t error_physical_addr)
+{
+ /*
+ * Memory Error Record
+ */
+
+ /* Validation Bits */
+ build_append_int_noprefix(table,
+ (1ULL << 14) | /* Type Valid */
+ (1ULL << 1) /* Physical Address Valid */,
+ 8);
+ /* Error Status */
+ build_append_int_noprefix(table, 0, 8);
+ /* Physical Address */
+ build_append_int_noprefix(table, error_physical_addr, 8);
+ /* Skip all the detailed information normally found in such a record */
+ build_append_int_noprefix(table, 0, 48);
+ /* Memory Error Type */
+ build_append_int_noprefix(table, 0 /* Unknown error */, 1);
+ /* Skip all the detailed information normally found in such a record */
+ build_append_int_noprefix(table, 0, 7);
+}
+
+static int acpi_ghes_record_mem_error(uint64_t error_block_address,
+ uint64_t error_physical_addr)
+{
+ GArray *block;
+
+ /* Memory Error Section Type */
+ const uint8_t uefi_cper_mem_sec[] =
+ UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \
+ 0xED, 0x7C, 0x83, 0xB1);
+
+ /* invalid fru id: ACPI 4.0: 17.3.2.6.1 Generic Error Data,
+ * Table 17-13 Generic Error Data Entry
+ */
+ QemuUUID fru_id = {};
+ uint32_t data_length;
+
+ block = g_array_new(false, true /* clear */, 1);
+
+ /* This is the length if adding a new generic error data entry*/
+ data_length = ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH;
+
+ /*
+ * Check whether it will run out of the preallocated memory if adding a new
+ * generic error data entry
+ */
+ if ((data_length + ACPI_GHES_GESB_SIZE) > ACPI_GHES_MAX_RAW_DATA_LENGTH) {
+ error_report("Not enough memory to record new CPER!!!");
+ g_array_free(block, true);
+ return -1;
+ }
+
+ /* Build the new generic error status block header */
+ acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE,
+ 0, 0, data_length, ACPI_CPER_SEV_RECOVERABLE);
+
+ /* Build this new generic error data entry header */
+ acpi_ghes_generic_error_data(block, uefi_cper_mem_sec,
+ ACPI_CPER_SEV_RECOVERABLE, 0, 0,
+ ACPI_GHES_MEM_CPER_LENGTH, fru_id, 0);
+
+ /* Build the memory section CPER for above new generic error data entry */
+ acpi_ghes_build_append_mem_cper(block, error_physical_addr);
+
+ /* Write the generic error data entry into guest memory */
+ cpu_physical_memory_write(error_block_address, block->data, block->len);
+
+ g_array_free(block, true);
+
+ return 0;
+}
+
+/*
+ * Build table for the hardware error fw_cfg blob.
+ * Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg blobs.
+ * See docs/specs/acpi_hest_ghes.rst for blobs format.
+ */
+void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker)
+{
+ int i, error_status_block_offset;
+
+ /* Build error_block_address */
+ for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
+ build_append_int_noprefix(hardware_errors, 0, sizeof(uint64_t));
+ }
+
+ /* Build read_ack_register */
+ for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
+ /*
+ * Initialize the value of read_ack_register to 1, so GHES can be
+ * writeable after (re)boot.
+ * ACPI 6.2: 18.3.2.8 Generic Hardware Error Source version 2
+ * (GHESv2 - Type 10)
+ */
+ build_append_int_noprefix(hardware_errors, 1, sizeof(uint64_t));
+ }
+
+ /* Generic Error Status Block offset in the hardware error fw_cfg blob */
+ error_status_block_offset = hardware_errors->len;
+
+ /* Reserve space for Error Status Data Block */
+ acpi_data_push(hardware_errors,
+ ACPI_GHES_MAX_RAW_DATA_LENGTH * ACPI_GHES_ERROR_SOURCE_COUNT);
+
+ /* Tell guest firmware to place hardware_errors blob into RAM */
+ bios_linker_loader_alloc(linker, ACPI_GHES_ERRORS_FW_CFG_FILE,
+ hardware_errors, sizeof(uint64_t), false);
+
+ for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
+ /*
+ * Tell firmware to patch error_block_address entries to point to
+ * corresponding "Generic Error Status Block"
+ */
+ bios_linker_loader_add_pointer(linker,
+ ACPI_GHES_ERRORS_FW_CFG_FILE, sizeof(uint64_t) * i,
+ sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE,
+ error_status_block_offset + i * ACPI_GHES_MAX_RAW_DATA_LENGTH);
+ }
+
+ /*
+ * tell firmware to write hardware_errors GPA into
+ * hardware_errors_addr fw_cfg, once the former has been initialized.
+ */
+ bios_linker_loader_write_pointer(linker, ACPI_GHES_DATA_ADDR_FW_CFG_FILE,
+ 0, sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, 0);
+}
+
+/* Build Generic Hardware Error Source version 2 (GHESv2) */
+static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker)
+{
+ uint64_t address_offset;
+ /*
+ * Type:
+ * Generic Hardware Error Source version 2(GHESv2 - Type 10)
+ */
+ build_append_int_noprefix(table_data, ACPI_GHES_SOURCE_GENERIC_ERROR_V2, 2);
+ /* Source Id */
+ build_append_int_noprefix(table_data, source_id, 2);
+ /* Related Source Id */
+ build_append_int_noprefix(table_data, 0xffff, 2);
+ /* Flags */
+ build_append_int_noprefix(table_data, 0, 1);
+ /* Enabled */
+ build_append_int_noprefix(table_data, 1, 1);
+
+ /* Number of Records To Pre-allocate */
+ build_append_int_noprefix(table_data, 1, 4);
+ /* Max Sections Per Record */
+ build_append_int_noprefix(table_data, 1, 4);
+ /* Max Raw Data Length */
+ build_append_int_noprefix(table_data, ACPI_GHES_MAX_RAW_DATA_LENGTH, 4);
+
+ address_offset = table_data->len;
+ /* Error Status Address */
+ build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0x40, 0,
+ 4 /* QWord access */, 0);
+ bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,
+ address_offset + GAS_ADDR_OFFSET, sizeof(uint64_t),
+ ACPI_GHES_ERRORS_FW_CFG_FILE, source_id * sizeof(uint64_t));
+
+ switch (source_id) {
+ case ACPI_HEST_SRC_ID_SEA:
+ /*
+ * Notification Structure
+ * Now only enable ARMv8 SEA notification type
+ */
+ build_ghes_hw_error_notification(table_data, ACPI_GHES_NOTIFY_SEA);
+ break;
+ default:
+ error_report("Not support this error source");
+ abort();
+ }
+
+ /* Error Status Block Length */
+ build_append_int_noprefix(table_data, ACPI_GHES_MAX_RAW_DATA_LENGTH, 4);
+
+ /*
+ * Read Ack Register
+ * ACPI 6.1: 18.3.2.8 Generic Hardware Error Source
+ * version 2 (GHESv2 - Type 10)
+ */
+ address_offset = table_data->len;
+ build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0x40, 0,
+ 4 /* QWord access */, 0);
+ bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,
+ address_offset + GAS_ADDR_OFFSET,
+ sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE,
+ (ACPI_GHES_ERROR_SOURCE_COUNT + source_id) * sizeof(uint64_t));
+
+ /*
+ * Read Ack Preserve field
+ * We only provide the first bit in Read Ack Register to OSPM to write
+ * while the other bits are preserved.
+ */
+ build_append_int_noprefix(table_data, ~0x1ULL, 8);
+ /* Read Ack Write */
+ build_append_int_noprefix(table_data, 0x1, 8);
+}
+
+/* Build Hardware Error Source Table */
+void acpi_build_hest(GArray *table_data, BIOSLinker *linker)
+{
+ uint64_t hest_start = table_data->len;
+
+ /* Hardware Error Source Table header*/
+ acpi_data_push(table_data, sizeof(AcpiTableHeader));
+
+ /* Error Source Count */
+ build_append_int_noprefix(table_data, ACPI_GHES_ERROR_SOURCE_COUNT, 4);
+
+ build_ghes_v2(table_data, ACPI_HEST_SRC_ID_SEA, linker);
+
+ build_header(linker, table_data, (void *)(table_data->data + hest_start),
+ "HEST", table_data->len - hest_start, 1, NULL, NULL);
+}
+
+void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
+ GArray *hardware_error)
+{
+ /* Create a read-only fw_cfg file for GHES */
+ fw_cfg_add_file(s, ACPI_GHES_ERRORS_FW_CFG_FILE, hardware_error->data,
+ hardware_error->len);
+
+ /* Create a read-write fw_cfg file for Address */
+ fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL,
+ NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false);
+}
+
+int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
+{
+ uint64_t error_block_addr, read_ack_register_addr, read_ack_register = 0;
+ uint64_t start_addr;
+ bool ret = -1;
+ AcpiGedState *acpi_ged_state;
+ AcpiGhesState *ags;
+
+ assert(source_id < ACPI_HEST_SRC_ID_RESERVED);
+
+ acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
+ NULL));
+ g_assert(acpi_ged_state);
+ ags = &acpi_ged_state->ghes_state;
+
+ start_addr = le64_to_cpu(ags->ghes_addr_le);
+
+ if (physical_address) {
+
+ if (source_id < ACPI_HEST_SRC_ID_RESERVED) {
+ start_addr += source_id * sizeof(uint64_t);
+ }
+
+ cpu_physical_memory_read(start_addr, &error_block_addr,
+ sizeof(error_block_addr));
+
+ error_block_addr = le64_to_cpu(error_block_addr);
+
+ read_ack_register_addr = start_addr +
+ ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t);
+
+ cpu_physical_memory_read(read_ack_register_addr,
+ &read_ack_register, sizeof(read_ack_register));
+
+ /* zero means OSPM does not acknowledge the error */
+ if (!read_ack_register) {
+ error_report("OSPM does not acknowledge previous error,"
+ " so can not record CPER for current error anymore");
+ } else if (error_block_addr) {
+ read_ack_register = cpu_to_le64(0);
+ /*
+ * Clear the Read Ack Register, OSPM will write it to 1 when
+ * it acknowledges this error.
+ */
+ cpu_physical_memory_write(read_ack_register_addr,
+ &read_ack_register, sizeof(uint64_t));
+
+ ret = acpi_ghes_record_mem_error(error_block_addr,
+ physical_address);
+ } else
+ error_report("can not find Generic Error Status Block");
+ }
+
+ return ret;
+}
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index fa7bf8b507..9316d12b70 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -27,6 +27,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/uuid.h"
#include "hw/acpi/acpi.h"
#include "hw/acpi/aml-build.h"
#include "hw/acpi/bios-linker-loader.h"
@@ -34,18 +35,13 @@
#include "hw/mem/nvdimm.h"
#include "qemu/nvdimm-utils.h"
-#define NVDIMM_UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
- { (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
- (b) & 0xff, ((b) >> 8) & 0xff, (c) & 0xff, ((c) >> 8) & 0xff, \
- (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }
-
/*
* define Byte Addressable Persistent Memory (PM) Region according to
* ACPI 6.0: 5.2.25.1 System Physical Address Range Structure.
*/
static const uint8_t nvdimm_nfit_spa_uuid[] =
- NVDIMM_UUID_LE(0x66f0d379, 0xb4f3, 0x4074, 0xac, 0x43, 0x0d, 0x33,
- 0x18, 0xb7, 0x8c, 0xdb);
+ UUID_LE(0x66f0d379, 0xb4f3, 0x4074, 0xac, 0x43, 0x0d, 0x33,
+ 0x18, 0xb7, 0x8c, 0xdb);
/*
* NVDIMM Firmware Interface Table
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 1eacb2fc17..4d57d1e436 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -73,6 +73,21 @@ struct AspeedBoardState {
SCU_AST2500_HW_STRAP_ACPI_ENABLE | \
SCU_HW_STRAP_SPI_MODE(SCU_HW_STRAP_SPI_MASTER))
+/* Sonorapass hardware value: 0xF100D216 */
+#define SONORAPASS_BMC_HW_STRAP1 ( \
+ SCU_AST2500_HW_STRAP_SPI_AUTOFETCH_ENABLE | \
+ SCU_AST2500_HW_STRAP_GPIO_STRAP_ENABLE | \
+ SCU_AST2500_HW_STRAP_UART_DEBUG | \
+ SCU_AST2500_HW_STRAP_RESERVED28 | \
+ SCU_AST2500_HW_STRAP_DDR4_ENABLE | \
+ SCU_HW_STRAP_VGA_CLASS_CODE | \
+ SCU_HW_STRAP_LPC_RESET_PIN | \
+ SCU_HW_STRAP_SPI_MODE(SCU_HW_STRAP_SPI_MASTER) | \
+ SCU_AST2500_HW_STRAP_SET_AXI_AHB_RATIO(AXI_AHB_RATIO_2_1) | \
+ SCU_HW_STRAP_VGA_BIOS_ROM | \
+ SCU_HW_STRAP_VGA_SIZE_SET(VGA_16M_DRAM) | \
+ SCU_AST2500_HW_STRAP_RESERVED1)
+
/* Swift hardware value: 0xF11AD206 */
#define SWIFT_BMC_HW_STRAP1 ( \
AST2500_HW_STRAP1_DEFAULTS | \
@@ -437,6 +452,50 @@ static void swift_bmc_i2c_init(AspeedBoardState *bmc)
i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 12), "tmp105", 0x4a);
}
+static void sonorapass_bmc_i2c_init(AspeedBoardState *bmc)
+{
+ AspeedSoCState *soc = &bmc->soc;
+
+ /* bus 2 : */
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 2), "tmp105", 0x48);
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 2), "tmp105", 0x49);
+ /* bus 2 : pca9546 @ 0x73 */
+
+ /* bus 3 : pca9548 @ 0x70 */
+
+ /* bus 4 : */
+ uint8_t *eeprom4_54 = g_malloc0(8 * 1024);
+ smbus_eeprom_init_one(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 4), 0x54,
+ eeprom4_54);
+ /* PCA9539 @ 0x76, but PCA9552 is compatible */
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 4), "pca9552", 0x76);
+ /* PCA9539 @ 0x77, but PCA9552 is compatible */
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 4), "pca9552", 0x77);
+
+ /* bus 6 : */
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 6), "tmp105", 0x48);
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 6), "tmp105", 0x49);
+ /* bus 6 : pca9546 @ 0x73 */
+
+ /* bus 8 : */
+ uint8_t *eeprom8_56 = g_malloc0(8 * 1024);
+ smbus_eeprom_init_one(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 8), 0x56,
+ eeprom8_56);
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 8), "pca9552", 0x60);
+ i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 8), "pca9552", 0x61);
+ /* bus 8 : adc128d818 @ 0x1d */
+ /* bus 8 : adc128d818 @ 0x1f */
+
+ /*
+ * bus 13 : pca9548 @ 0x71
+ * - channel 3:
+ * - tmm421 @ 0x4c
+ * - tmp421 @ 0x4e
+ * - tmp421 @ 0x4f
+ */
+
+}
+
static void witherspoon_bmc_i2c_init(AspeedBoardState *bmc)
{
AspeedSoCState *soc = &bmc->soc;
@@ -549,6 +608,21 @@ static void aspeed_machine_romulus_class_init(ObjectClass *oc, void *data)
mc->default_ram_size = 512 * MiB;
};
+static void aspeed_machine_sonorapass_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+ mc->desc = "OCP SonoraPass BMC (ARM1176)";
+ amc->soc_name = "ast2500-a1";
+ amc->hw_strap1 = SONORAPASS_BMC_HW_STRAP1;
+ amc->fmc_model = "mx66l1g45g";
+ amc->spi_model = "mx66l1g45g";
+ amc->num_cs = 2;
+ amc->i2c_init = sonorapass_bmc_i2c_init;
+ mc->default_ram_size = 512 * MiB;
+};
+
static void aspeed_machine_swift_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -629,6 +703,10 @@ static const TypeInfo aspeed_machine_types[] = {
.parent = TYPE_ASPEED_MACHINE,
.class_init = aspeed_machine_swift_class_init,
}, {
+ .name = MACHINE_TYPE_NAME("sonorapass-bmc"),
+ .parent = TYPE_ASPEED_MACHINE,
+ .class_init = aspeed_machine_sonorapass_class_init,
+ }, {
.name = MACHINE_TYPE_NAME("witherspoon-bmc"),
.parent = TYPE_ASPEED_MACHINE,
.class_init = aspeed_machine_witherspoon_class_init,
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index f22b1e6097..1b0a584c7b 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -49,6 +49,7 @@
#include "sysemu/reset.h"
#include "kvm_arm.h"
#include "migration/vmstate.h"
+#include "hw/acpi/ghes.h"
#define ARM_SPI_BASE 32
@@ -818,6 +819,12 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
acpi_add_table(table_offsets, tables_blob);
build_spcr(tables_blob, tables->linker, vms);
+ if (vms->ras) {
+ build_ghes_error_table(tables->hardware_errors, tables->linker);
+ acpi_add_table(table_offsets, tables_blob);
+ acpi_build_hest(tables_blob, tables->linker);
+ }
+
if (ms->numa_state->num_nodes > 0) {
acpi_add_table(table_offsets, tables_blob);
build_srat(tables_blob, tables->linker, vms);
@@ -910,6 +917,7 @@ void virt_acpi_setup(VirtMachineState *vms)
{
AcpiBuildTables tables;
AcpiBuildState *build_state;
+ AcpiGedState *acpi_ged_state;
if (!vms->fw_cfg) {
trace_virt_acpi_setup();
@@ -940,6 +948,13 @@ void virt_acpi_setup(VirtMachineState *vms)
fw_cfg_add_file(vms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data,
acpi_data_len(tables.tcpalog));
+ if (vms->ras) {
+ assert(vms->acpi_dev);
+ acpi_ged_state = ACPI_GED(vms->acpi_dev);
+ acpi_ghes_add_fw_cfg(&acpi_ged_state->ghes_state,
+ vms->fw_cfg, tables.hardware_errors);
+ }
+
build_state->rsdp_mr = acpi_add_rom_blob(virt_acpi_build_update,
build_state, tables.rsdp,
ACPI_BUILD_RSDP_FILE, 0);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 634db0cfe9..9e76fa7b01 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1995,6 +1995,20 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name,
visit_type_OnOffAuto(v, name, &vms->acpi, errp);
}
+static bool virt_get_ras(Object *obj, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ return vms->ras;
+}
+
+static void virt_set_ras(Object *obj, bool value, Error **errp)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+
+ vms->ras = value;
+}
+
static char *virt_get_gic_version(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2327,6 +2341,15 @@ static void virt_instance_init(Object *obj)
"Valid values are none and smmuv3",
NULL);
+ /* Default disallows RAS instantiation */
+ vms->ras = false;
+ object_property_add_bool(obj, "ras", virt_get_ras,
+ virt_set_ras, NULL);
+ object_property_set_description(obj, "ras",
+ "Set on/off to enable/disable reporting host memory errors "
+ "to a KVM guest using ACPI and guest external abort exceptions",
+ NULL);
+
vms->irqmap = a15irqmap;
virt_flash_create(vms);
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 1539fe0667..ed7c89309e 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -220,6 +220,7 @@ struct AcpiBuildTables {
GArray *rsdp;
GArray *tcpalog;
GArray *vmgenid;
+ GArray *hardware_errors;
BIOSLinker *linker;
} AcpiBuildTables;
diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h
index 9eb86ca4fd..83917de024 100644
--- a/include/hw/acpi/generic_event_device.h
+++ b/include/hw/acpi/generic_event_device.h
@@ -61,6 +61,7 @@
#include "hw/sysbus.h"
#include "hw/acpi/memory_hotplug.h"
+#include "hw/acpi/ghes.h"
#define ACPI_POWER_BUTTON_DEVICE "PWRB"
@@ -96,6 +97,7 @@ typedef struct AcpiGedState {
GEDState ged_state;
uint32_t ged_event_bitmap;
qemu_irq irq;
+ AcpiGhesState ghes_state;
} AcpiGedState;
void build_ged_aml(Aml *table, const char* name, HotplugHandler *hotplug_dev,
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
new file mode 100644
index 0000000000..4ad025e09a
--- /dev/null
+++ b/include/hw/acpi/ghes.h
@@ -0,0 +1,74 @@
+/*
+ * Support for generating APEI tables and recording CPER for Guests
+ *
+ * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Author: Dongjiu Geng <gengdongjiu@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef ACPI_GHES_H
+#define ACPI_GHES_H
+
+#include "hw/acpi/bios-linker-loader.h"
+
+/*
+ * Values for Hardware Error Notification Type field
+ */
+enum AcpiGhesNotifyType {
+ /* Polled */
+ ACPI_GHES_NOTIFY_POLLED = 0,
+ /* External Interrupt */
+ ACPI_GHES_NOTIFY_EXTERNAL = 1,
+ /* Local Interrupt */
+ ACPI_GHES_NOTIFY_LOCAL = 2,
+ /* SCI */
+ ACPI_GHES_NOTIFY_SCI = 3,
+ /* NMI */
+ ACPI_GHES_NOTIFY_NMI = 4,
+ /* CMCI, ACPI 5.0: 18.3.2.7, Table 18-290 */
+ ACPI_GHES_NOTIFY_CMCI = 5,
+ /* MCE, ACPI 5.0: 18.3.2.7, Table 18-290 */
+ ACPI_GHES_NOTIFY_MCE = 6,
+ /* GPIO-Signal, ACPI 6.0: 18.3.2.7, Table 18-332 */
+ ACPI_GHES_NOTIFY_GPIO = 7,
+ /* ARMv8 SEA, ACPI 6.1: 18.3.2.9, Table 18-345 */
+ ACPI_GHES_NOTIFY_SEA = 8,
+ /* ARMv8 SEI, ACPI 6.1: 18.3.2.9, Table 18-345 */
+ ACPI_GHES_NOTIFY_SEI = 9,
+ /* External Interrupt - GSIV, ACPI 6.1: 18.3.2.9, Table 18-345 */
+ ACPI_GHES_NOTIFY_GSIV = 10,
+ /* Software Delegated Exception, ACPI 6.2: 18.3.2.9, Table 18-383 */
+ ACPI_GHES_NOTIFY_SDEI = 11,
+ /* 12 and greater are reserved */
+ ACPI_GHES_NOTIFY_RESERVED = 12
+};
+
+enum {
+ ACPI_HEST_SRC_ID_SEA = 0,
+ /* future ids go here */
+ ACPI_HEST_SRC_ID_RESERVED,
+};
+
+typedef struct AcpiGhesState {
+ uint64_t ghes_addr_le;
+} AcpiGhesState;
+
+void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker);
+void acpi_build_hest(GArray *table_data, BIOSLinker *linker);
+void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
+ GArray *hardware_errors);
+int acpi_ghes_record_errors(uint8_t notify, uint64_t error_physical_addr);
+#endif
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 6d67ace76e..31878ddc72 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -132,6 +132,7 @@ typedef struct {
bool highmem_ecam;
bool its;
bool virt;
+ bool ras;
OnOffAuto acpi;
VirtGICType gic_version;
VirtIOMMUType iommu;
diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h
index 129c45f2c5..9925febfa5 100644
--- a/include/qemu/uuid.h
+++ b/include/qemu/uuid.h
@@ -34,6 +34,33 @@ typedef struct {
};
} QemuUUID;
+/**
+ * UUID_LE - converts the fields of UUID to little-endian array,
+ * each of parameters is the filed of UUID.
+ *
+ * @time_low: The low field of the timestamp
+ * @time_mid: The middle field of the timestamp
+ * @time_hi_and_version: The high field of the timestamp
+ * multiplexed with the version number
+ * @clock_seq_hi_and_reserved: The high field of the clock
+ * sequence multiplexed with the variant
+ * @clock_seq_low: The low field of the clock sequence
+ * @node0: The spatially unique node0 identifier
+ * @node1: The spatially unique node1 identifier
+ * @node2: The spatially unique node2 identifier
+ * @node3: The spatially unique node3 identifier
+ * @node4: The spatially unique node4 identifier
+ * @node5: The spatially unique node5 identifier
+ */
+#define UUID_LE(time_low, time_mid, time_hi_and_version, \
+ clock_seq_hi_and_reserved, clock_seq_low, node0, node1, node2, \
+ node3, node4, node5) \
+ { (time_low) & 0xff, ((time_low) >> 8) & 0xff, ((time_low) >> 16) & 0xff, \
+ ((time_low) >> 24) & 0xff, (time_mid) & 0xff, ((time_mid) >> 8) & 0xff, \
+ (time_hi_and_version) & 0xff, ((time_hi_and_version) >> 8) & 0xff, \
+ (clock_seq_hi_and_reserved), (clock_seq_low), (node0), (node1), (node2),\
+ (node3), (node4), (node5) }
+
#define UUID_FMT "%02hhx%02hhx%02hhx%02hhx-" \
"%02hhx%02hhx-%02hhx%02hhx-" \
"%02hhx%02hhx-" \
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 141342de98..3b2250471c 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -379,8 +379,7 @@ bool kvm_vcpu_id_is_valid(int vcpu_id);
/* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
unsigned long kvm_arch_vcpu_id(CPUState *cpu);
-#ifdef TARGET_I386
-#define KVM_HAVE_MCE_INJECTION 1
+#ifdef KVM_HAVE_MCE_INJECTION
void kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
#endif
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index ac2d1f8b56..c660a70c51 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -42,4 +42,16 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
AddressSpace *as, int as_id);
void kvm_set_max_memslot_size(hwaddr max_slot_size);
+
+/**
+ * kvm_hwpoison_page_add:
+ *
+ * Parameters:
+ * @ram_addr: the address in the RAM for the poisoned page
+ *
+ * Add a poisoned page to the list
+ *
+ * Return: None.
+ */
+void kvm_hwpoison_page_add(ram_addr_t ram_addr);
#endif
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 676f216b67..5d995368d4 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -28,6 +28,10 @@
/* ARM processors have a weak memory model */
#define TCG_GUEST_DEFAULT_MO (0)
+#ifdef TARGET_AARCH64
+#define KVM_HAVE_MCE_INJECTION 1
+#endif
+
#define EXCP_UDEF 1 /* undefined instruction */
#define EXCP_SWI 2 /* software interrupt */
#define EXCP_PREFETCH_ABORT 3
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index 591baef535..00b0e08f33 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -605,6 +605,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
#endif
cc->cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt;
+ cc->gdb_core_xml_file = "arm-m-profile.xml";
}
static const ARMCPUInfo arm_tcg_cpus[] = {
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index 063551df23..ecfa88f8e6 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -57,8 +57,12 @@ int arm_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
}
return gdb_get_reg32(mem_buf, 0);
case 25:
- /* CPSR */
- return gdb_get_reg32(mem_buf, cpsr_read(env));
+ /* CPSR, or XPSR for M-profile */
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ return gdb_get_reg32(mem_buf, xpsr_read(env));
+ } else {
+ return gdb_get_reg32(mem_buf, cpsr_read(env));
+ }
}
/* Unknown register. */
return 0;
@@ -98,8 +102,18 @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
}
return 4;
case 25:
- /* CPSR */
- cpsr_write(env, tmp, 0xffffffff, CPSRWriteByGDBStub);
+ /* CPSR, or XPSR for M-profile */
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ /*
+ * Don't allow writing to XPSR.Exception as it can cause
+ * a transition into or out of handler mode (it's not
+ * writeable via the MSR insn so this is a reasonable
+ * restriction). Other fields are safe to update.
+ */
+ xpsr_write(env, tmp, ~XPSR_EXCP);
+ } else {
+ cpsr_write(env, tmp, 0xffffffff, CPSRWriteByGDBStub);
+ }
return 4;
}
/* Unknown register. */
diff --git a/target/arm/helper.c b/target/arm/helper.c
index b88d27819d..a92ae55672 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -3465,7 +3465,7 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
* Report exception with ESR indicating a fault due to a
* translation table walk for a cache maintenance instruction.
*/
- syn = syn_data_abort_no_iss(current_el == target_el,
+ syn = syn_data_abort_no_iss(current_el == target_el, 0,
fi.ea, 1, fi.s1ptw, 1, fsc);
env->exception.vaddress = value;
env->exception.fsr = fsr;
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 5817626b20..49336dc432 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -203,16 +203,16 @@ DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, ptr, i32)
DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
-DEF_HELPER_3(recps_f32, f32, f32, f32, env)
-DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
+DEF_HELPER_3(recps_f32, f32, env, f32, f32)
+DEF_HELPER_3(rsqrts_f32, f32, env, f32, f32)
DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
-DEF_HELPER_2(recpe_u32, i32, i32, ptr)
-DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr)
+DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32)
+DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32)
DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i32, i32, i32, ptr, i32)
DEF_HELPER_3(shl_cc, i32, env, i32, i32)
@@ -284,13 +284,6 @@ DEF_HELPER_2(neon_pmax_s8, i32, i32, i32)
DEF_HELPER_2(neon_pmax_u16, i32, i32, i32)
DEF_HELPER_2(neon_pmax_s16, i32, i32, i32)
-DEF_HELPER_2(neon_abd_u8, i32, i32, i32)
-DEF_HELPER_2(neon_abd_s8, i32, i32, i32)
-DEF_HELPER_2(neon_abd_u16, i32, i32, i32)
-DEF_HELPER_2(neon_abd_s16, i32, i32, i32)
-DEF_HELPER_2(neon_abd_u32, i32, i32, i32)
-DEF_HELPER_2(neon_abd_s32, i32, i32, i32)
-
DEF_HELPER_2(neon_shl_u16, i32, i32, i32)
DEF_HELPER_2(neon_shl_s16, i32, i32, i32)
DEF_HELPER_2(neon_rshl_u8, i32, i32, i32)
@@ -403,7 +396,6 @@ DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32)
DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32)
DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64)
-DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr)
DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr)
DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr)
DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr)
@@ -602,6 +594,8 @@ DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
@@ -691,6 +685,66 @@ DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_usra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_srsra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srsra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srsra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srsra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_ursra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_sri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_sli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_uabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_saba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_saba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_saba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_saba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_uaba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#include "helper-sve.h"
diff --git a/target/arm/internals.h b/target/arm/internals.h
index a833e3941d..4bdbc3a8ac 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -451,13 +451,14 @@ static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc)
| ARM_EL_IL | (ea << 9) | (s1ptw << 7) | fsc;
}
-static inline uint32_t syn_data_abort_no_iss(int same_el,
+static inline uint32_t syn_data_abort_no_iss(int same_el, int fnv,
int ea, int cm, int s1ptw,
int wnr, int fsc)
{
return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
| ARM_EL_IL
- | (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc;
+ | (fnv << 10) | (ea << 9) | (cm << 8) | (s1ptw << 7)
+ | (wnr << 6) | fsc;
}
static inline uint32_t syn_data_abort_with_iss(int same_el,
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index cd8ab6b8ae..f09ed9f4df 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -28,6 +28,9 @@
#include "sysemu/kvm_int.h"
#include "kvm_arm.h"
#include "internals.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/ghes.h"
+#include "hw/arm/virt.h"
static bool have_guest_debug;
@@ -883,6 +886,30 @@ int kvm_arm_cpreg_level(uint64_t regidx)
return KVM_PUT_RUNTIME_STATE;
}
+/* Callers must hold the iothread mutex lock */
+static void kvm_inject_arm_sea(CPUState *c)
+{
+ ARMCPU *cpu = ARM_CPU(c);
+ CPUARMState *env = &cpu->env;
+ CPUClass *cc = CPU_GET_CLASS(c);
+ uint32_t esr;
+ bool same_el;
+
+ c->exception_index = EXCP_DATA_ABORT;
+ env->exception.target_el = 1;
+
+ /*
+ * Set the DFSC to synchronous external abort and set FnV to not valid,
+ * this will tell guest the FAR_ELx is UNKNOWN for this abort.
+ */
+ same_el = arm_current_el(env) == env->exception.target_el;
+ esr = syn_data_abort_no_iss(same_el, 1, 0, 0, 0, 0, 0x10);
+
+ env->exception.syndrome = esr;
+
+ cc->do_interrupt(c);
+}
+
#define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
@@ -1316,6 +1343,56 @@ int kvm_arch_get_registers(CPUState *cs)
return ret;
}
+void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
+{
+ ram_addr_t ram_addr;
+ hwaddr paddr;
+ Object *obj = qdev_get_machine();
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+ bool acpi_enabled = virt_is_acpi_enabled(vms);
+
+ assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
+
+ if (acpi_enabled && addr &&
+ object_property_get_bool(obj, "ras", NULL)) {
+ ram_addr = qemu_ram_addr_from_host(addr);
+ if (ram_addr != RAM_ADDR_INVALID &&
+ kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
+ kvm_hwpoison_page_add(ram_addr);
+ /*
+ * If this is a BUS_MCEERR_AR, we know we have been called
+ * synchronously from the vCPU thread, so we can easily
+ * synchronize the state and inject an error.
+ *
+ * TODO: we currently don't tell the guest at all about
+ * BUS_MCEERR_AO. In that case we might either be being
+ * called synchronously from the vCPU thread, or a bit
+ * later from the main thread, so doing the injection of
+ * the error would be more complicated.
+ */
+ if (code == BUS_MCEERR_AR) {
+ kvm_cpu_synchronize_state(c);
+ if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
+ kvm_inject_arm_sea(c);
+ } else {
+ error_report("failed to record the error");
+ abort();
+ }
+ }
+ return;
+ }
+ if (code == BUS_MCEERR_AO) {
+ error_report("Hardware memory error at addr %p for memory used by "
+ "QEMU itself instead of guest system!", addr);
+ }
+ }
+
+ if (code == BUS_MCEERR_AR) {
+ error_report("Hardware memory error!");
+ exit(1);
+ }
+}
+
/* C6.6.29 BRK instruction */
static const uint32_t brk_insn = 0xd4200000;
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index ec3a92fe75..8beb1db768 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -42,9 +42,23 @@
@3same .... ... . . . size:2 .... .... .... . q:1 . . .... \
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
+@3same_q0 .... ... . . . size:2 .... .... .... . 0 . . .... \
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
+
+# For FP insns the high bit of 'size' is used as part of opcode decode
+@3same_fp .... ... . . . . size:1 .... .... .... . q:1 . . .... \
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
+@3same_fp_q0 .... ... . . . . size:1 .... .... .... . 0 . . .... \
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
+
+VHADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 0 .... @3same
+VHADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
VQADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 1 .... @3same
+VRHADD_S_3s 1111 001 0 0 . .. .... .... 0001 . . . 0 .... @3same
+VRHADD_U_3s 1111 001 1 0 . .. .... .... 0001 . . . 0 .... @3same
+
@3same_logic .... ... . . . .. .... .... .... . q:1 .. .... \
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0
@@ -57,6 +71,9 @@ VBSL_3s 1111 001 1 0 . 01 .... .... 0001 ... 1 .... @3same_logic
VBIT_3s 1111 001 1 0 . 10 .... .... 0001 ... 1 .... @3same_logic
VBIF_3s 1111 001 1 0 . 11 .... .... 0001 ... 1 .... @3same_logic
+VHSUB_S_3s 1111 001 0 0 . .. .... .... 0010 . . . 0 .... @3same
+VHSUB_U_3s 1111 001 1 0 . .. .... .... 0010 . . . 0 .... @3same
+
VQSUB_S_3s 1111 001 0 0 . .. .... .... 0010 . . . 1 .... @3same
VQSUB_U_3s 1111 001 1 0 . .. .... .... 0010 . . . 1 .... @3same
@@ -65,14 +82,64 @@ VCGT_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 0 .... @3same
VCGE_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 1 .... @3same
VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same
-VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same
-VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same
+# The _rev suffix indicates that Vn and Vm are reversed. This is
+# the case for shifts. In the Arm ARM these insns are documented
+# with the Vm and Vn fields in their usual places, but in the
+# assembly the operands are listed "backwards", ie in the order
+# Dd, Dm, Dn where other insns use Dd, Dn, Dm. For QEMU we choose
+# to consider Vm and Vn as being in different fields in the insn,
+# which allows us to avoid special-casing shifts in the trans_
+# function code. We would otherwise need to manually swap the operands
+# over to call Neon helper functions that are shared with AArch64,
+# which does not have this odd reversed-operand situation.
+@3same_rev .... ... . . . size:2 .... .... .... . q:1 . . .... \
+ &3same vn=%vm_dp vm=%vn_dp vd=%vd_dp
+
+VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same_rev
+VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same_rev
+
+# Insns operating on 64-bit elements (size!=0b11 handled elsewhere)
+# The _rev suffix indicates that Vn and Vm are reversed (as explained
+# by the comment for the @3same_rev format).
+@3same_64_rev .... ... . . . 11 .... .... .... . q:1 . . .... \
+ &3same vm=%vn_dp vn=%vm_dp vd=%vd_dp size=3
+
+{
+ VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
+ VQSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_rev
+}
+{
+ VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
+ VQSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_rev
+}
+{
+ VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
+ VRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_rev
+}
+{
+ VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
+ VRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_rev
+}
+{
+ VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
+ VQRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_rev
+}
+{
+ VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
+ VQRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_rev
+}
VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
VMIN_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 1 .... @3same
VMIN_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 1 .... @3same
+VABD_S_3s 1111 001 0 0 . .. .... .... 0111 . . . 0 .... @3same
+VABD_U_3s 1111 001 1 0 . .. .... .... 0111 . . . 0 .... @3same
+
+VABA_S_3s 1111 001 0 0 . .. .... .... 0111 . . . 1 .... @3same
+VABA_U_3s 1111 001 1 0 . .. .... .... 0111 . . . 1 .... @3same
+
VADD_3s 1111 001 0 0 . .. .... .... 1000 . . . 0 .... @3same
VSUB_3s 1111 001 1 0 . .. .... .... 1000 . . . 0 .... @3same
@@ -84,3 +151,51 @@ VMLS_3s 1111 001 1 0 . .. .... .... 1001 . . . 0 .... @3same
VMUL_3s 1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
VMUL_p_3s 1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same
+
+VPMAX_S_3s 1111 001 0 0 . .. .... .... 1010 . . . 0 .... @3same_q0
+VPMAX_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 0 .... @3same_q0
+
+VPMIN_S_3s 1111 001 0 0 . .. .... .... 1010 . . . 1 .... @3same_q0
+VPMIN_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 1 .... @3same_q0
+
+VQDMULH_3s 1111 001 0 0 . .. .... .... 1011 . . . 0 .... @3same
+VQRDMULH_3s 1111 001 1 0 . .. .... .... 1011 . . . 0 .... @3same
+
+VPADD_3s 1111 001 0 0 . .. .... .... 1011 . . . 1 .... @3same_q0
+
+VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
+
+SHA1_3s 1111 001 0 0 . optype:2 .... .... 1100 . 1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+SHA256H_3s 1111 001 1 0 . 00 .... .... 1100 . 1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+VFMA_fp_3s 1111 001 0 0 . 0 . .... .... 1100 ... 1 .... @3same_fp
+VFMS_fp_3s 1111 001 0 0 . 1 . .... .... 1100 ... 1 .... @3same_fp
+
+VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
+
+VADD_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
+VSUB_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
+VPADD_fp_3s 1111 001 1 0 . 0 . .... .... 1101 ... 0 .... @3same_fp_q0
+VABD_fp_3s 1111 001 1 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
+VMLA_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
+VMLS_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 1 .... @3same_fp
+VMUL_fp_3s 1111 001 1 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
+VCEQ_fp_3s 1111 001 0 0 . 0 . .... .... 1110 ... 0 .... @3same_fp
+VCGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 0 .... @3same_fp
+VACGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 1 .... @3same_fp
+VCGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 0 .... @3same_fp
+VACGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 1 .... @3same_fp
+VMAX_fp_3s 1111 001 0 0 . 0 . .... .... 1111 ... 0 .... @3same_fp
+VMIN_fp_3s 1111 001 0 0 . 1 . .... .... 1111 ... 0 .... @3same_fp
+VPMAX_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 0 .... @3same_fp_q0
+VPMIN_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 0 .... @3same_fp_q0
+VRECPS_fp_3s 1111 001 0 0 . 0 . .... .... 1111 ... 1 .... @3same_fp
+VRSQRTS_fp_3s 1111 001 0 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
+VMAXNM_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 1 .... @3same_fp
+VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
index 448be93fa1..b637265691 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@@ -576,16 +576,6 @@ NEON_POP(pmax_s16, neon_s16, 2)
NEON_POP(pmax_u16, neon_u16, 2)
#undef NEON_FN
-#define NEON_FN(dest, src1, src2) \
- dest = (src1 > src2) ? (src1 - src2) : (src2 - src1)
-NEON_VOP(abd_s8, neon_s8, 4)
-NEON_VOP(abd_u8, neon_u8, 4)
-NEON_VOP(abd_s16, neon_s16, 2)
-NEON_VOP(abd_u16, neon_u16, 2)
-NEON_VOP(abd_s32, neon_s32, 1)
-NEON_VOP(abd_u32, neon_u32, 1)
-#undef NEON_FN
-
#define NEON_FN(dest, src1, src2) do { \
int8_t tmp; \
tmp = (int8_t)src2; \
@@ -1835,13 +1825,6 @@ uint64_t HELPER(neon_qneg_s64)(CPUARMState *env, uint64_t x)
}
/* NEON Float helpers. */
-uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp)
-{
- float_status *fpst = fpstp;
- float32 f0 = make_float32(a);
- float32 f1 = make_float32(b);
- return float32_val(float32_abs(float32_sub(f0, f1, fpst)));
-}
/* Floating point comparisons produce an integer result.
* Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
index e63f8bda29..7388494a55 100644
--- a/target/arm/tlb_helper.c
+++ b/target/arm/tlb_helper.c
@@ -33,7 +33,7 @@ static inline uint32_t merge_syn_data_abort(uint32_t template_syn,
* ISV field.
*/
if (!(template_syn & ARM_EL_ISV) || target_el != 2 || s1ptw) {
- syn = syn_data_abort_no_iss(same_el,
+ syn = syn_data_abort_no_iss(same_el, 0,
ea, 0, s1ptw, is_write, fsc);
} else {
/*
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 62e5729904..991e451644 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -577,33 +577,6 @@ static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
is_q ? 16 : 8, vec_full_reg_size(s));
}
-/* Expand a 2-operand AdvSIMD vector operation using an op descriptor. */
-static void gen_gvec_op2(DisasContext *s, bool is_q, int rd,
- int rn, const GVecGen2 *gvec_op)
-{
- tcg_gen_gvec_2(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
- is_q ? 16 : 8, vec_full_reg_size(s), gvec_op);
-}
-
-/* Expand a 2-operand + immediate AdvSIMD vector operation using
- * an op descriptor.
- */
-static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
- int rn, int64_t imm, const GVecGen2i *gvec_op)
-{
- tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
- is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
-}
-
-/* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */
-static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
- int rn, int rm, const GVecGen3 *gvec_op)
-{
- tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm), is_q ? 16 : 8,
- vec_full_reg_size(s), gvec_op);
-}
-
/* Expand a 3-operand operation using an out-of-line helper. */
static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
int rn, int rm, int data, gen_helper_gvec_3 *fn)
@@ -614,18 +587,6 @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
}
-/* Expand a 3-operand + env pointer operation using
- * an out-of-line helper.
- */
-static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
- int rn, int rm, gen_helper_gvec_3_ptr *fn)
-{
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm), cpu_env,
- is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
-}
-
/* Expand a 3-operand + fpstatus pointer + simd data value operation using
* an out-of-line helper.
*/
@@ -9726,7 +9687,7 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
switch (opcode) {
case 0x3c: /* URECPE */
- gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
+ gen_helper_recpe_u32(tcg_res, tcg_op);
break;
case 0x3d: /* FRECPE */
gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
@@ -10165,16 +10126,7 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
int size = 32 - clz32(immh) - 1;
int immhb = immh << 3 | immb;
int shift = 2 * (8 << size) - immhb;
- bool accumulate = false;
- int dsize = is_q ? 128 : 64;
- int esize = 8 << size;
- int elements = dsize/esize;
- MemOp memop = size | (is_u ? 0 : MO_SIGN);
- TCGv_i64 tcg_rn = new_tmp_a64(s);
- TCGv_i64 tcg_rd = new_tmp_a64(s);
- TCGv_i64 tcg_round;
- uint64_t round_const;
- int i;
+ GVecGen2iFn *gvec_fn;
if (extract32(immh, 3, 1) && !is_q) {
unallocated_encoding(s);
@@ -10188,27 +10140,12 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
switch (opcode) {
case 0x02: /* SSRA / USRA (accumulate) */
- if (is_u) {
- /* Shift count same as element size produces zero to add. */
- if (shift == 8 << size) {
- goto done;
- }
- gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
- } else {
- /* Shift count same as element size produces all sign to add. */
- if (shift == 8 << size) {
- shift -= 1;
- }
- gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
- }
- return;
+ gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
+ break;
+
case 0x08: /* SRI */
- /* Shift count same as element size is valid but does nothing. */
- if (shift == 8 << size) {
- goto done;
- }
- gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
- return;
+ gvec_fn = gen_gvec_sri;
+ break;
case 0x00: /* SSHR / USHR */
if (is_u) {
@@ -10216,45 +10153,31 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
/* Shift count the same size as element size produces zero. */
tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
is_q ? 16 : 8, vec_full_reg_size(s), 0);
- } else {
- gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
+ return;
}
+ gvec_fn = tcg_gen_gvec_shri;
} else {
/* Shift count the same size as element size produces all sign. */
if (shift == 8 << size) {
shift -= 1;
}
- gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
+ gvec_fn = tcg_gen_gvec_sari;
}
- return;
+ break;
case 0x04: /* SRSHR / URSHR (rounding) */
+ gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
break;
+
case 0x06: /* SRSRA / URSRA (accum + rounding) */
- accumulate = true;
+ gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
break;
+
default:
g_assert_not_reached();
}
- round_const = 1ULL << (shift - 1);
- tcg_round = tcg_const_i64(round_const);
-
- for (i = 0; i < elements; i++) {
- read_vec_element(s, tcg_rn, rn, i, memop);
- if (accumulate) {
- read_vec_element(s, tcg_rd, rd, i, memop);
- }
-
- handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
- accumulate, is_u, size, shift);
-
- write_vec_element(s, tcg_rd, rd, i, size);
- }
- tcg_temp_free_i64(tcg_round);
-
- done:
- clear_vec_high(s, is_q, rd);
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
}
/* SHL/SLI - Vector shift left */
@@ -10278,7 +10201,7 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
}
if (insert) {
- gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]);
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
} else {
gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
}
@@ -11233,24 +11156,25 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
switch (opcode) {
case 0x01: /* SQADD, UQADD */
- tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
- offsetof(CPUARMState, vfp.qc),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm),
- is_q ? 16 : 8, vec_full_reg_size(s),
- (u ? uqadd_op : sqadd_op) + size);
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
+ }
return;
case 0x05: /* SQSUB, UQSUB */
- tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
- offsetof(CPUARMState, vfp.qc),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm),
- is_q ? 16 : 8, vec_full_reg_size(s),
- (u ? uqsub_op : sqsub_op) + size);
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
+ }
return;
case 0x08: /* SSHL, USHL */
- gen_gvec_op3(s, is_q, rd, rn, rm,
- u ? &ushl_op[size] : &sshl_op[size]);
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
+ }
return;
case 0x0c: /* SMAX, UMAX */
if (u) {
@@ -11266,6 +11190,20 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
}
return;
+ case 0xe: /* SABD, UABD */
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
+ }
+ return;
+ case 0xf: /* SABA, UABA */
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
+ }
+ return;
case 0x10: /* ADD, SUB */
if (u) {
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
@@ -11282,14 +11220,14 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
return;
case 0x12: /* MLA, MLS */
if (u) {
- gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
} else {
- gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
}
return;
case 0x11:
if (!u) { /* CMTST */
- gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
return;
}
/* else CMEQ */
@@ -11398,17 +11336,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genenvfn = fns[size][u];
break;
}
- case 0xe: /* SABD, UABD */
- case 0xf: /* SABA, UABA */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
- { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
- { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
- };
- genfn = fns[size][u];
- break;
- }
case 0x16: /* SQDMULH, SQRDMULH */
{
static NeonGenTwoOpEnvFn * const fns[2][2] = {
@@ -11757,29 +11684,11 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
switch (opcode) {
case 0x0: /* SQRDMLAH (vector) */
- switch (size) {
- case 1:
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
- break;
- case 2:
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
- break;
- default:
- g_assert_not_reached();
- }
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
return;
case 0x1: /* SQRDMLSH (vector) */
- switch (size) {
- case 1:
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
- break;
- case 2:
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
- break;
- default:
- g_assert_not_reached();
- }
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
return;
case 0x2: /* SDOT / UDOT */
@@ -12308,7 +12217,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
- need_fpstatus = true;
break;
case 0x1e: /* FRINT32Z */
case 0x1f: /* FRINT64Z */
@@ -12358,13 +12266,21 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
}
break;
case 0x8: /* CMGT, CMGE */
- gen_gvec_op2(s, is_q, rd, rn, u ? &cge0_op[size] : &cgt0_op[size]);
+ if (u) {
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
+ } else {
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
+ }
return;
case 0x9: /* CMEQ, CMLE */
- gen_gvec_op2(s, is_q, rd, rn, u ? &cle0_op[size] : &ceq0_op[size]);
+ if (u) {
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
+ } else {
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
+ }
return;
case 0xa: /* CMLT */
- gen_gvec_op2(s, is_q, rd, rn, &clt0_op[size]);
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
return;
case 0xb:
if (u) { /* ABS, NEG */
@@ -12468,7 +12384,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
break;
case 0x7c: /* URSQRTE */
- gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
+ gen_helper_rsqrte_u32(tcg_res, tcg_op);
break;
case 0x1e: /* FRINT32Z */
case 0x5e: /* FRINT32X */
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 50b77b6d71..3fe65a0b08 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -603,6 +603,12 @@ DO_3SAME(VBIC, tcg_gen_gvec_andc)
DO_3SAME(VORR, tcg_gen_gvec_or)
DO_3SAME(VORN, tcg_gen_gvec_orc)
DO_3SAME(VEOR, tcg_gen_gvec_xor)
+DO_3SAME(VSHL_S, gen_gvec_sshl)
+DO_3SAME(VSHL_U, gen_gvec_ushl)
+DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
+DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
+DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
+DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
/* These insns are all gvec_bitsel but with the inputs in various orders. */
#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
@@ -632,6 +638,13 @@ DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
+DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
+DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
+DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
+DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
+DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
+DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
+DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
#define DO_3SAME_CMP(INSN, COND) \
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
@@ -648,67 +661,652 @@ DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
-static void gen_VTST_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)
+static void gen_VMUL_p_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz,
+ 0, gen_helper_gvec_pmul_b);
+}
+
+static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
+{
+ if (a->size != 0) {
+ return false;
+ }
+ return do_3same(s, a, gen_VMUL_p_3s);
+}
+
+#define DO_VQRDMLAH(INSN, FUNC) \
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (!dc_isar_feature(aa32_rdm, s)) { \
+ return false; \
+ } \
+ if (a->size != 1 && a->size != 2) { \
+ return false; \
+ } \
+ return do_3same(s, a, FUNC); \
+ }
+
+DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
+DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
+
+static bool trans_SHA1_3s(DisasContext *s, arg_SHA1_3s *a)
+{
+ TCGv_ptr ptr1, ptr2, ptr3;
+ TCGv_i32 tmp;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+ !dc_isar_feature(aa32_sha1, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ ptr1 = vfp_reg_ptr(true, a->vd);
+ ptr2 = vfp_reg_ptr(true, a->vn);
+ ptr3 = vfp_reg_ptr(true, a->vm);
+ tmp = tcg_const_i32(a->optype);
+ gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp);
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_ptr(ptr1);
+ tcg_temp_free_ptr(ptr2);
+ tcg_temp_free_ptr(ptr3);
+
+ return true;
+}
+
+static bool trans_SHA256H_3s(DisasContext *s, arg_SHA256H_3s *a)
+{
+ TCGv_ptr ptr1, ptr2, ptr3;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+ !dc_isar_feature(aa32_sha2, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ ptr1 = vfp_reg_ptr(true, a->vd);
+ ptr2 = vfp_reg_ptr(true, a->vn);
+ ptr3 = vfp_reg_ptr(true, a->vm);
+ gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
+ tcg_temp_free_ptr(ptr1);
+ tcg_temp_free_ptr(ptr2);
+ tcg_temp_free_ptr(ptr3);
+
+ return true;
+}
+
+static bool trans_SHA256H2_3s(DisasContext *s, arg_SHA256H2_3s *a)
+{
+ TCGv_ptr ptr1, ptr2, ptr3;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+ !dc_isar_feature(aa32_sha2, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ ptr1 = vfp_reg_ptr(true, a->vd);
+ ptr2 = vfp_reg_ptr(true, a->vn);
+ ptr3 = vfp_reg_ptr(true, a->vm);
+ gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
+ tcg_temp_free_ptr(ptr1);
+ tcg_temp_free_ptr(ptr2);
+ tcg_temp_free_ptr(ptr3);
+
+ return true;
+}
+
+static bool trans_SHA256SU1_3s(DisasContext *s, arg_SHA256SU1_3s *a)
{
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &cmtst_op[vece]);
+ TCGv_ptr ptr1, ptr2, ptr3;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+ !dc_isar_feature(aa32_sha2, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ ptr1 = vfp_reg_ptr(true, a->vd);
+ ptr2 = vfp_reg_ptr(true, a->vn);
+ ptr3 = vfp_reg_ptr(true, a->vm);
+ gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
+ tcg_temp_free_ptr(ptr1);
+ tcg_temp_free_ptr(ptr2);
+ tcg_temp_free_ptr(ptr3);
+
+ return true;
}
-DO_3SAME_NO_SZ_3(VTST, gen_VTST_3s)
-#define DO_3SAME_GVEC4(INSN, OPARRAY) \
+#define DO_3SAME_64(INSN, FUNC) \
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
uint32_t rn_ofs, uint32_t rm_ofs, \
uint32_t oprsz, uint32_t maxsz) \
{ \
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), \
- rn_ofs, rm_ofs, oprsz, maxsz, &OPARRAY[vece]); \
+ static const GVecGen3 op = { .fni8 = FUNC }; \
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \
} \
DO_3SAME(INSN, gen_##INSN##_3s)
-DO_3SAME_GVEC4(VQADD_S, sqadd_op)
-DO_3SAME_GVEC4(VQADD_U, uqadd_op)
-DO_3SAME_GVEC4(VQSUB_S, sqsub_op)
-DO_3SAME_GVEC4(VQSUB_U, uqsub_op)
+#define DO_3SAME_64_ENV(INSN, FUNC) \
+ static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \
+ { \
+ FUNC(d, cpu_env, n, m); \
+ } \
+ DO_3SAME_64(INSN, gen_##INSN##_elt)
-static void gen_VMUL_p_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
- uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz,
- 0, gen_helper_gvec_pmul_b);
-}
+DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
+DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
+DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
+DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
+DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
+DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
-static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
+#define DO_3SAME_32(INSN, FUNC) \
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ static const GVecGen3 ops[4] = { \
+ { .fni4 = gen_helper_neon_##FUNC##8 }, \
+ { .fni4 = gen_helper_neon_##FUNC##16 }, \
+ { .fni4 = gen_helper_neon_##FUNC##32 }, \
+ { 0 }, \
+ }; \
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
+ } \
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (a->size > 2) { \
+ return false; \
+ } \
+ return do_3same(s, a, gen_##INSN##_3s); \
+ }
+
+/*
+ * Some helper functions need to be passed the cpu_env. In order
+ * to use those with the gvec APIs like tcg_gen_gvec_3() we need
+ * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
+ * and which call a NeonGenTwoOpEnvFn().
+ */
+#define WRAP_ENV_FN(WRAPNAME, FUNC) \
+ static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \
+ { \
+ FUNC(d, cpu_env, n, m); \
+ }
+
+#define DO_3SAME_32_ENV(INSN, FUNC) \
+ WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \
+ WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \
+ WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ static const GVecGen3 ops[4] = { \
+ { .fni4 = gen_##INSN##_tramp8 }, \
+ { .fni4 = gen_##INSN##_tramp16 }, \
+ { .fni4 = gen_##INSN##_tramp32 }, \
+ { 0 }, \
+ }; \
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
+ } \
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (a->size > 2) { \
+ return false; \
+ } \
+ return do_3same(s, a, gen_##INSN##_3s); \
+ }
+
+DO_3SAME_32(VHADD_S, hadd_s)
+DO_3SAME_32(VHADD_U, hadd_u)
+DO_3SAME_32(VHSUB_S, hsub_s)
+DO_3SAME_32(VHSUB_U, hsub_u)
+DO_3SAME_32(VRHADD_S, rhadd_s)
+DO_3SAME_32(VRHADD_U, rhadd_u)
+DO_3SAME_32(VRSHL_S, rshl_s)
+DO_3SAME_32(VRSHL_U, rshl_u)
+
+DO_3SAME_32_ENV(VQSHL_S, qshl_s)
+DO_3SAME_32_ENV(VQSHL_U, qshl_u)
+DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
+DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
+
+static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
{
- if (a->size != 0) {
+ /* Operations handled pairwise 32 bits at a time */
+ TCGv_i32 tmp, tmp2, tmp3;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}
- return do_3same(s, a, gen_VMUL_p_3s);
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->size == 3) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ assert(a->q == 0); /* enforced by decode patterns */
+
+ /*
+ * Note that we have to be careful not to clobber the source operands
+ * in the "vm == vd" case by storing the result of the first pass too
+ * early. Since Q is 0 there are always just two passes, so instead
+ * of a complicated loop over each pass we just unroll.
+ */
+ tmp = neon_load_reg(a->vn, 0);
+ tmp2 = neon_load_reg(a->vn, 1);
+ fn(tmp, tmp, tmp2);
+ tcg_temp_free_i32(tmp2);
+
+ tmp3 = neon_load_reg(a->vm, 0);
+ tmp2 = neon_load_reg(a->vm, 1);
+ fn(tmp3, tmp3, tmp2);
+ tcg_temp_free_i32(tmp2);
+
+ neon_store_reg(a->vd, 0, tmp);
+ neon_store_reg(a->vd, 1, tmp3);
+ return true;
}
-#define DO_3SAME_GVEC3_NO_SZ_3(INSN, OPARRAY) \
+#define DO_3SAME_PAIR(INSN, func) \
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ static NeonGenTwoOpFn * const fns[] = { \
+ gen_helper_neon_##func##8, \
+ gen_helper_neon_##func##16, \
+ gen_helper_neon_##func##32, \
+ }; \
+ if (a->size > 2) { \
+ return false; \
+ } \
+ return do_3same_pair(s, a, fns[a->size]); \
+ }
+
+/* 32-bit pairwise ops end up the same as the elementwise versions. */
+#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
+#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
+#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
+#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
+#define gen_helper_neon_padd_u32 tcg_gen_add_i32
+
+DO_3SAME_PAIR(VPMAX_S, pmax_s)
+DO_3SAME_PAIR(VPMIN_S, pmin_s)
+DO_3SAME_PAIR(VPMAX_U, pmax_u)
+DO_3SAME_PAIR(VPMIN_U, pmin_u)
+DO_3SAME_PAIR(VPADD, padd_u)
+
+#define DO_3SAME_VQDMULH(INSN, FUNC) \
+ WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
+ WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
uint32_t rn_ofs, uint32_t rm_ofs, \
uint32_t oprsz, uint32_t maxsz) \
{ \
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, \
- oprsz, maxsz, &OPARRAY[vece]); \
+ static const GVecGen3 ops[2] = { \
+ { .fni4 = gen_##INSN##_tramp16 }, \
+ { .fni4 = gen_##INSN##_tramp32 }, \
+ }; \
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
} \
- DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (a->size != 1 && a->size != 2) { \
+ return false; \
+ } \
+ return do_3same(s, a, gen_##INSN##_3s); \
+ }
+DO_3SAME_VQDMULH(VQDMULH, qdmulh)
+DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
-DO_3SAME_GVEC3_NO_SZ_3(VMLA, mla_op)
-DO_3SAME_GVEC3_NO_SZ_3(VMLS, mls_op)
+static bool do_3same_fp(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn,
+ bool reads_vd)
+{
+ /*
+ * FP operations handled elementwise 32 bits at a time.
+ * If reads_vd is true then the old value of Vd will be
+ * loaded before calling the callback function. This is
+ * used for multiply-accumulate type operations.
+ */
+ TCGv_i32 tmp, tmp2;
+ int pass;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & a->q) {
+ return false;
+ }
-#define DO_3SAME_GVEC3_SHIFT(INSN, OPARRAY) \
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+ tmp = neon_load_reg(a->vn, pass);
+ tmp2 = neon_load_reg(a->vm, pass);
+ if (reads_vd) {
+ TCGv_i32 tmp_rd = neon_load_reg(a->vd, pass);
+ fn(tmp_rd, tmp, tmp2, fpstatus);
+ neon_store_reg(a->vd, pass, tmp_rd);
+ tcg_temp_free_i32(tmp);
+ } else {
+ fn(tmp, tmp, tmp2, fpstatus);
+ neon_store_reg(a->vd, pass, tmp);
+ }
+ tcg_temp_free_i32(tmp2);
+ }
+ tcg_temp_free_ptr(fpstatus);
+ return true;
+}
+
+/*
+ * For all the functions using this macro, size == 1 means fp16,
+ * which is an architecture extension we don't implement yet.
+ */
+#define DO_3S_FP_GVEC(INSN,FUNC) \
static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
uint32_t rn_ofs, uint32_t rm_ofs, \
uint32_t oprsz, uint32_t maxsz) \
{ \
- /* Note the operation is vshl vd,vm,vn */ \
- tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, \
- oprsz, maxsz, &OPARRAY[vece]); \
+ TCGv_ptr fpst = get_fpstatus_ptr(1); \
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \
+ oprsz, maxsz, 0, FUNC); \
+ tcg_temp_free_ptr(fpst); \
} \
- DO_3SAME(INSN, gen_##INSN##_3s)
+ static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (a->size != 0) { \
+ /* TODO fp16 support */ \
+ return false; \
+ } \
+ return do_3same(s, a, gen_##INSN##_3s); \
+ }
+
+
+DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s)
+DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s)
+DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s)
+DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s)
+
+/*
+ * For all the functions using this macro, size == 1 means fp16,
+ * which is an architecture extension we don't implement yet.
+ */
+#define DO_3S_FP(INSN,FUNC,READS_VD) \
+ static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (a->size != 0) { \
+ /* TODO fp16 support */ \
+ return false; \
+ } \
+ return do_3same_fp(s, a, FUNC, READS_VD); \
+ }
+
+DO_3S_FP(VCEQ, gen_helper_neon_ceq_f32, false)
+DO_3S_FP(VCGE, gen_helper_neon_cge_f32, false)
+DO_3S_FP(VCGT, gen_helper_neon_cgt_f32, false)
+DO_3S_FP(VACGE, gen_helper_neon_acge_f32, false)
+DO_3S_FP(VACGT, gen_helper_neon_acgt_f32, false)
+DO_3S_FP(VMAX, gen_helper_vfp_maxs, false)
+DO_3S_FP(VMIN, gen_helper_vfp_mins, false)
+
+static void gen_VMLA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
+ TCGv_ptr fpstatus)
+{
+ gen_helper_vfp_muls(vn, vn, vm, fpstatus);
+ gen_helper_vfp_adds(vd, vd, vn, fpstatus);
+}
+
+static void gen_VMLS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
+ TCGv_ptr fpstatus)
+{
+ gen_helper_vfp_muls(vn, vn, vm, fpstatus);
+ gen_helper_vfp_subs(vd, vd, vn, fpstatus);
+}
+
+DO_3S_FP(VMLA, gen_VMLA_fp_3s, true)
+DO_3S_FP(VMLS, gen_VMLS_fp_3s, true)
+
+static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
+ return false;
+ }
+
+ if (a->size != 0) {
+ /* TODO fp16 support */
+ return false;
+ }
+
+ return do_3same_fp(s, a, gen_helper_vfp_maxnums, false);
+}
+
+static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
+ return false;
+ }
+
+ if (a->size != 0) {
+ /* TODO fp16 support */
+ return false;
+ }
+
+ return do_3same_fp(s, a, gen_helper_vfp_minnums, false);
+}
+
+WRAP_ENV_FN(gen_VRECPS_tramp, gen_helper_recps_f32)
+
+static void gen_VRECPS_fp_3s(unsigned vece, uint32_t rd_ofs,
+ uint32_t rn_ofs, uint32_t rm_ofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 ops = { .fni4 = gen_VRECPS_tramp };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
+}
+
+static bool trans_VRECPS_fp_3s(DisasContext *s, arg_3same *a)
+{
+ if (a->size != 0) {
+ /* TODO fp16 support */
+ return false;
+ }
+
+ return do_3same(s, a, gen_VRECPS_fp_3s);
+}
+
+WRAP_ENV_FN(gen_VRSQRTS_tramp, gen_helper_rsqrts_f32)
+
+static void gen_VRSQRTS_fp_3s(unsigned vece, uint32_t rd_ofs,
+ uint32_t rn_ofs, uint32_t rm_ofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 ops = { .fni4 = gen_VRSQRTS_tramp };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
+}
+
+static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
+{
+ if (a->size != 0) {
+ /* TODO fp16 support */
+ return false;
+ }
+
+ return do_3same(s, a, gen_VRSQRTS_fp_3s);
+}
+
+static void gen_VFMA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
+ TCGv_ptr fpstatus)
+{
+ gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
+}
+
+static bool trans_VFMA_fp_3s(DisasContext *s, arg_3same *a)
+{
+ if (!dc_isar_feature(aa32_simdfmac, s)) {
+ return false;
+ }
+
+ if (a->size != 0) {
+ /* TODO fp16 support */
+ return false;
+ }
+
+ return do_3same_fp(s, a, gen_VFMA_fp_3s, true);
+}
+
+static void gen_VFMS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
+ TCGv_ptr fpstatus)
+{
+ gen_helper_vfp_negs(vn, vn);
+ gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
+}
+
+static bool trans_VFMS_fp_3s(DisasContext *s, arg_3same *a)
+{
+ if (!dc_isar_feature(aa32_simdfmac, s)) {
+ return false;
+ }
+
+ if (a->size != 0) {
+ /* TODO fp16 support */
+ return false;
+ }
+
+ return do_3same_fp(s, a, gen_VFMS_fp_3s, true);
+}
+
+static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
+{
+ /* FP operations handled pairwise 32 bits at a time */
+ TCGv_i32 tmp, tmp2, tmp3;
+ TCGv_ptr fpstatus;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ assert(a->q == 0); /* enforced by decode patterns */
+
+ /*
+ * Note that we have to be careful not to clobber the source operands
+ * in the "vm == vd" case by storing the result of the first pass too
+ * early. Since Q is 0 there are always just two passes, so instead
+ * of a complicated loop over each pass we just unroll.
+ */
+ fpstatus = get_fpstatus_ptr(1);
+ tmp = neon_load_reg(a->vn, 0);
+ tmp2 = neon_load_reg(a->vn, 1);
+ fn(tmp, tmp, tmp2, fpstatus);
+ tcg_temp_free_i32(tmp2);
+
+ tmp3 = neon_load_reg(a->vm, 0);
+ tmp2 = neon_load_reg(a->vm, 1);
+ fn(tmp3, tmp3, tmp2, fpstatus);
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_ptr(fpstatus);
+
+ neon_store_reg(a->vd, 0, tmp);
+ neon_store_reg(a->vd, 1, tmp3);
+ return true;
+}
+
+/*
+ * For all the functions using this macro, size == 1 means fp16,
+ * which is an architecture extension we don't implement yet.
+ */
+#define DO_3S_FP_PAIR(INSN,FUNC) \
+ static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (a->size != 0) { \
+ /* TODO fp16 support */ \
+ return false; \
+ } \
+ return do_3same_fp_pair(s, a, FUNC); \
+ }
-DO_3SAME_GVEC3_SHIFT(VSHL_S, sshl_op)
-DO_3SAME_GVEC3_SHIFT(VSHL_U, ushl_op)
+DO_3S_FP_PAIR(VPADD, gen_helper_vfp_adds)
+DO_3S_FP_PAIR(VPMAX, gen_helper_vfp_maxs)
+DO_3S_FP_PAIR(VPMIN, gen_helper_vfp_mins)
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 74fac1d09c..4c9bb8b5ac 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -3011,12 +3011,6 @@ static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
}
}
-/* 32-bit pairwise ops end up the same as the elementwise versions. */
-#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
-#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
-#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
-#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
-
#define GEN_NEON_INTEGER_OP_ENV(name) do { \
switch ((size << 1) | u) { \
case 0: \
@@ -3397,78 +3391,6 @@ static void gen_neon_narrow_op(int op, int u, int size,
}
}
-/* Symbolic constants for op fields for Neon 3-register same-length.
- * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
- * table A7-9.
- */
-#define NEON_3R_VHADD 0
-#define NEON_3R_VQADD 1
-#define NEON_3R_VRHADD 2
-#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
-#define NEON_3R_VHSUB 4
-#define NEON_3R_VQSUB 5
-#define NEON_3R_VCGT 6
-#define NEON_3R_VCGE 7
-#define NEON_3R_VSHL 8
-#define NEON_3R_VQSHL 9
-#define NEON_3R_VRSHL 10
-#define NEON_3R_VQRSHL 11
-#define NEON_3R_VMAX 12
-#define NEON_3R_VMIN 13
-#define NEON_3R_VABD 14
-#define NEON_3R_VABA 15
-#define NEON_3R_VADD_VSUB 16
-#define NEON_3R_VTST_VCEQ 17
-#define NEON_3R_VML 18 /* VMLA, VMLS */
-#define NEON_3R_VMUL 19
-#define NEON_3R_VPMAX 20
-#define NEON_3R_VPMIN 21
-#define NEON_3R_VQDMULH_VQRDMULH 22
-#define NEON_3R_VPADD_VQRDMLAH 23
-#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
-#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
-#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
-#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
-#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
-#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
-#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
-#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
-
-static const uint8_t neon_3r_sizes[] = {
- [NEON_3R_VHADD] = 0x7,
- [NEON_3R_VQADD] = 0xf,
- [NEON_3R_VRHADD] = 0x7,
- [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
- [NEON_3R_VHSUB] = 0x7,
- [NEON_3R_VQSUB] = 0xf,
- [NEON_3R_VCGT] = 0x7,
- [NEON_3R_VCGE] = 0x7,
- [NEON_3R_VSHL] = 0xf,
- [NEON_3R_VQSHL] = 0xf,
- [NEON_3R_VRSHL] = 0xf,
- [NEON_3R_VQRSHL] = 0xf,
- [NEON_3R_VMAX] = 0x7,
- [NEON_3R_VMIN] = 0x7,
- [NEON_3R_VABD] = 0x7,
- [NEON_3R_VABA] = 0x7,
- [NEON_3R_VADD_VSUB] = 0xf,
- [NEON_3R_VTST_VCEQ] = 0x7,
- [NEON_3R_VML] = 0x7,
- [NEON_3R_VMUL] = 0x7,
- [NEON_3R_VPMAX] = 0x7,
- [NEON_3R_VPMIN] = 0x7,
- [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
- [NEON_3R_VPADD_VQRDMLAH] = 0x7,
- [NEON_3R_SHA] = 0xf, /* size field encodes op type */
- [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
- [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
-};
-
/* Symbolic constants for op fields for Neon 2-register miscellaneous.
* The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
* table A7-13.
@@ -3629,220 +3551,91 @@ static const uint8_t neon_2rm_sizes[] = {
[NEON_2RM_VCVT_UF] = 0x4,
};
-
-/* Expand v8.1 simd helper. */
-static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
- int q, int rd, int rn, int rm)
+static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz,
+ gen_helper_gvec_3_ptr *fn)
{
- if (dc_isar_feature(aa32_rdm, s)) {
- int opr_sz = (1 + q) * 8;
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm), cpu_env,
- opr_sz, opr_sz, 0, fn);
- return 0;
- }
- return 1;
-}
+ TCGv_ptr qc_ptr = tcg_temp_new_ptr();
-static void gen_ceq0_i32(TCGv_i32 d, TCGv_i32 a)
-{
- tcg_gen_setcondi_i32(TCG_COND_EQ, d, a, 0);
- tcg_gen_neg_i32(d, d);
+ tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
+ opr_sz, max_sz, 0, fn);
+ tcg_temp_free_ptr(qc_ptr);
}
-static void gen_ceq0_i64(TCGv_i64 d, TCGv_i64 a)
+void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
{
- tcg_gen_setcondi_i64(TCG_COND_EQ, d, a, 0);
- tcg_gen_neg_i64(d, d);
+ static gen_helper_gvec_3_ptr * const fns[2] = {
+ gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
+ };
+ tcg_debug_assert(vece >= 1 && vece <= 2);
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
}
-static void gen_ceq0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
+void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
{
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
- tcg_gen_cmp_vec(TCG_COND_EQ, vece, d, a, zero);
- tcg_temp_free_vec(zero);
-}
+ static gen_helper_gvec_3_ptr * const fns[2] = {
+ gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
+ };
+ tcg_debug_assert(vece >= 1 && vece <= 2);
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
+}
+
+#define GEN_CMP0(NAME, COND) \
+ static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a) \
+ { \
+ tcg_gen_setcondi_i32(COND, d, a, 0); \
+ tcg_gen_neg_i32(d, d); \
+ } \
+ static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a) \
+ { \
+ tcg_gen_setcondi_i64(COND, d, a, 0); \
+ tcg_gen_neg_i64(d, d); \
+ } \
+ static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
+ { \
+ TCGv_vec zero = tcg_const_zeros_vec_matching(d); \
+ tcg_gen_cmp_vec(COND, vece, d, a, zero); \
+ tcg_temp_free_vec(zero); \
+ } \
+ void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m, \
+ uint32_t opr_sz, uint32_t max_sz) \
+ { \
+ const GVecGen2 op[4] = { \
+ { .fno = gen_helper_gvec_##NAME##0_b, \
+ .fniv = gen_##NAME##0_vec, \
+ .opt_opc = vecop_list_cmp, \
+ .vece = MO_8 }, \
+ { .fno = gen_helper_gvec_##NAME##0_h, \
+ .fniv = gen_##NAME##0_vec, \
+ .opt_opc = vecop_list_cmp, \
+ .vece = MO_16 }, \
+ { .fni4 = gen_##NAME##0_i32, \
+ .fniv = gen_##NAME##0_vec, \
+ .opt_opc = vecop_list_cmp, \
+ .vece = MO_32 }, \
+ { .fni8 = gen_##NAME##0_i64, \
+ .fniv = gen_##NAME##0_vec, \
+ .opt_opc = vecop_list_cmp, \
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64, \
+ .vece = MO_64 }, \
+ }; \
+ tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]); \
+ }
static const TCGOpcode vecop_list_cmp[] = {
INDEX_op_cmp_vec, 0
};
-const GVecGen2 ceq0_op[4] = {
- { .fno = gen_helper_gvec_ceq0_b,
- .fniv = gen_ceq0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_8 },
- { .fno = gen_helper_gvec_ceq0_h,
- .fniv = gen_ceq0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_16 },
- { .fni4 = gen_ceq0_i32,
- .fniv = gen_ceq0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_32 },
- { .fni8 = gen_ceq0_i64,
- .fniv = gen_ceq0_vec,
- .opt_opc = vecop_list_cmp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .vece = MO_64 },
-};
-
-static void gen_cle0_i32(TCGv_i32 d, TCGv_i32 a)
-{
- tcg_gen_setcondi_i32(TCG_COND_LE, d, a, 0);
- tcg_gen_neg_i32(d, d);
-}
-
-static void gen_cle0_i64(TCGv_i64 d, TCGv_i64 a)
-{
- tcg_gen_setcondi_i64(TCG_COND_LE, d, a, 0);
- tcg_gen_neg_i64(d, d);
-}
-
-static void gen_cle0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
-{
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
- tcg_gen_cmp_vec(TCG_COND_LE, vece, d, a, zero);
- tcg_temp_free_vec(zero);
-}
-
-const GVecGen2 cle0_op[4] = {
- { .fno = gen_helper_gvec_cle0_b,
- .fniv = gen_cle0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_8 },
- { .fno = gen_helper_gvec_cle0_h,
- .fniv = gen_cle0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_16 },
- { .fni4 = gen_cle0_i32,
- .fniv = gen_cle0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_32 },
- { .fni8 = gen_cle0_i64,
- .fniv = gen_cle0_vec,
- .opt_opc = vecop_list_cmp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .vece = MO_64 },
-};
-
-static void gen_cge0_i32(TCGv_i32 d, TCGv_i32 a)
-{
- tcg_gen_setcondi_i32(TCG_COND_GE, d, a, 0);
- tcg_gen_neg_i32(d, d);
-}
-
-static void gen_cge0_i64(TCGv_i64 d, TCGv_i64 a)
-{
- tcg_gen_setcondi_i64(TCG_COND_GE, d, a, 0);
- tcg_gen_neg_i64(d, d);
-}
-
-static void gen_cge0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
-{
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
- tcg_gen_cmp_vec(TCG_COND_GE, vece, d, a, zero);
- tcg_temp_free_vec(zero);
-}
-
-const GVecGen2 cge0_op[4] = {
- { .fno = gen_helper_gvec_cge0_b,
- .fniv = gen_cge0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_8 },
- { .fno = gen_helper_gvec_cge0_h,
- .fniv = gen_cge0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_16 },
- { .fni4 = gen_cge0_i32,
- .fniv = gen_cge0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_32 },
- { .fni8 = gen_cge0_i64,
- .fniv = gen_cge0_vec,
- .opt_opc = vecop_list_cmp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .vece = MO_64 },
-};
-
-static void gen_clt0_i32(TCGv_i32 d, TCGv_i32 a)
-{
- tcg_gen_setcondi_i32(TCG_COND_LT, d, a, 0);
- tcg_gen_neg_i32(d, d);
-}
-
-static void gen_clt0_i64(TCGv_i64 d, TCGv_i64 a)
-{
- tcg_gen_setcondi_i64(TCG_COND_LT, d, a, 0);
- tcg_gen_neg_i64(d, d);
-}
-
-static void gen_clt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
-{
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
- tcg_gen_cmp_vec(TCG_COND_LT, vece, d, a, zero);
- tcg_temp_free_vec(zero);
-}
-
-const GVecGen2 clt0_op[4] = {
- { .fno = gen_helper_gvec_clt0_b,
- .fniv = gen_clt0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_8 },
- { .fno = gen_helper_gvec_clt0_h,
- .fniv = gen_clt0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_16 },
- { .fni4 = gen_clt0_i32,
- .fniv = gen_clt0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_32 },
- { .fni8 = gen_clt0_i64,
- .fniv = gen_clt0_vec,
- .opt_opc = vecop_list_cmp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .vece = MO_64 },
-};
+GEN_CMP0(ceq, TCG_COND_EQ)
+GEN_CMP0(cle, TCG_COND_LE)
+GEN_CMP0(cge, TCG_COND_GE)
+GEN_CMP0(clt, TCG_COND_LT)
+GEN_CMP0(cgt, TCG_COND_GT)
-static void gen_cgt0_i32(TCGv_i32 d, TCGv_i32 a)
-{
- tcg_gen_setcondi_i32(TCG_COND_GT, d, a, 0);
- tcg_gen_neg_i32(d, d);
-}
-
-static void gen_cgt0_i64(TCGv_i64 d, TCGv_i64 a)
-{
- tcg_gen_setcondi_i64(TCG_COND_GT, d, a, 0);
- tcg_gen_neg_i64(d, d);
-}
-
-static void gen_cgt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
-{
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
- tcg_gen_cmp_vec(TCG_COND_GT, vece, d, a, zero);
- tcg_temp_free_vec(zero);
-}
-
-const GVecGen2 cgt0_op[4] = {
- { .fno = gen_helper_gvec_cgt0_b,
- .fniv = gen_cgt0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_8 },
- { .fno = gen_helper_gvec_cgt0_h,
- .fniv = gen_cgt0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_16 },
- { .fni4 = gen_cgt0_i32,
- .fniv = gen_cgt0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_32 },
- { .fni8 = gen_cgt0_i64,
- .fniv = gen_cgt0_vec,
- .opt_opc = vecop_list_cmp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .vece = MO_64 },
-};
+#undef GEN_CMP0
static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
@@ -3874,33 +3667,51 @@ static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
tcg_gen_add_vec(vece, d, d, a);
}
-static const TCGOpcode vecop_list_ssra[] = {
- INDEX_op_sari_vec, INDEX_op_add_vec, 0
-};
+void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_ssra8_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_ssra16_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_ssra32_i32,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_ssra64_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_b,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
-const GVecGen2i ssra_op[4] = {
- { .fni8 = gen_ssra8_i64,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_ssra,
- .vece = MO_8 },
- { .fni8 = gen_ssra16_i64,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_ssra,
- .vece = MO_16 },
- { .fni4 = gen_ssra32_i32,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_ssra,
- .vece = MO_32 },
- { .fni8 = gen_ssra64_i64,
- .fniv = gen_ssra_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list_ssra,
- .load_dest = true,
- .vece = MO_64 },
-};
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits.
+ */
+ shift = MIN(shift, (8 << vece) - 1);
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+}
static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
@@ -3932,33 +3743,471 @@ static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
tcg_gen_add_vec(vece, d, d, a);
}
-static const TCGOpcode vecop_list_usra[] = {
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
-};
+void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_usra8_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8, },
+ { .fni8 = gen_usra16_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16, },
+ { .fni4 = gen_usra32_i32,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32, },
+ { .fni8 = gen_usra64_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64, },
+ };
-const GVecGen2i usra_op[4] = {
- { .fni8 = gen_usra8_i64,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_8, },
- { .fni8 = gen_usra16_i64,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_16, },
- { .fni4 = gen_usra32_i32,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_32, },
- { .fni8 = gen_usra64_i64,
- .fniv = gen_usra_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_64, },
-};
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Unsigned results in all zeros as input to accumulate: nop.
+ */
+ if (shift < (8 << vece)) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ } else {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ }
+}
+
+/*
+ * Shift one less than the requested amount, and the low bit is
+ * the rounding bit. For the 8 and 16-bit operations, because we
+ * mask the low bit, we can perform a normal integer shift instead
+ * of a vector shift.
+ */
+static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
+ tcg_gen_vec_sar8i_i64(d, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
+ tcg_gen_vec_sar16i_i64(d, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
+ tcg_gen_sari_i32(d, a, sh);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
+
+static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
+ tcg_gen_sari_i64(d, a, sh);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
+ tcg_gen_dupi_vec(vece, ones, 1);
+ tcg_gen_and_vec(vece, t, t, ones);
+ tcg_gen_sari_vec(vece, d, a, sh);
+ tcg_gen_add_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(ones);
+}
+
+void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_srshr8_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_srshr16_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_srshr32_i32,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_srshr64_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ if (shift == (8 << vece)) {
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits. With rounding, this produces
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
+ * I.e. always zero.
+ */
+ tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_srshr8_i64(t, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_srshr16_i64(t, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ gen_srshr32_i32(t, a, sh);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
+
+static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_srshr64_i64(t, a, sh);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ gen_srshr_vec(vece, t, a, sh);
+ tcg_gen_add_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
+
+void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_srsra8_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni8 = gen_srsra16_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_srsra32_i32,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_srsra64_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits. With rounding, this produces
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
+ * I.e. always zero. With accumulation, this leaves D unchanged.
+ */
+ if (shift == (8 << vece)) {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
+ tcg_gen_vec_shr8i_i64(d, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
+ tcg_gen_vec_shr16i_i64(d, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
+ tcg_gen_shri_i32(d, a, sh);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
+
+static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
+ tcg_gen_shri_i64(d, a, sh);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shri_vec(vece, t, a, shift - 1);
+ tcg_gen_dupi_vec(vece, ones, 1);
+ tcg_gen_and_vec(vece, t, t, ones);
+ tcg_gen_shri_vec(vece, d, a, shift);
+ tcg_gen_add_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(ones);
+}
+
+void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_urshr8_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_urshr16_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_urshr32_i32,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_urshr64_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ if (shift == (8 << vece)) {
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Unsigned results in zero. With rounding, this produces a
+ * copy of the most significant bit.
+ */
+ tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 8) {
+ tcg_gen_vec_shr8i_i64(t, a, 7);
+ } else {
+ gen_urshr8_i64(t, a, sh);
+ }
+ tcg_gen_vec_add8_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 16) {
+ tcg_gen_vec_shr16i_i64(t, a, 15);
+ } else {
+ gen_urshr16_i64(t, a, sh);
+ }
+ tcg_gen_vec_add16_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ if (sh == 32) {
+ tcg_gen_shri_i32(t, a, 31);
+ } else {
+ gen_urshr32_i32(t, a, sh);
+ }
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
+
+static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 64) {
+ tcg_gen_shri_i64(t, a, 63);
+ } else {
+ gen_urshr64_i64(t, a, sh);
+ }
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ if (sh == (8 << vece)) {
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
+ } else {
+ gen_urshr_vec(vece, t, a, sh);
+ }
+ tcg_gen_add_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
+
+void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_ursra8_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni8 = gen_ursra16_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_ursra32_i32,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_ursra64_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+}
static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
@@ -3998,47 +4247,62 @@ static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
- if (sh == 0) {
- tcg_gen_mov_vec(d, a);
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
+ tcg_gen_shri_vec(vece, t, a, sh);
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(m);
+}
+
+void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
+ const GVecGen2i ops[4] = {
+ { .fni8 = gen_shr8_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_shr16_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_shr32_ins_i32,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_shr64_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /* Shift of esize leaves destination unchanged. */
+ if (shift < (8 << vece)) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
} else {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
-
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
- tcg_gen_shri_vec(vece, t, a, sh);
- tcg_gen_and_vec(vece, d, d, m);
- tcg_gen_or_vec(vece, d, d, t);
-
- tcg_temp_free_vec(t);
- tcg_temp_free_vec(m);
- }
-}
-
-static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 };
-
-const GVecGen2i sri_op[4] = {
- { .fni8 = gen_shr8_ins_i64,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_8 },
- { .fni8 = gen_shr16_ins_i64,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_16 },
- { .fni4 = gen_shr32_ins_i32,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_32 },
- { .fni8 = gen_shr64_ins_i64,
- .fniv = gen_shr_ins_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_64 },
-};
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ }
+}
static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
@@ -4076,47 +4340,60 @@ static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
- if (sh == 0) {
- tcg_gen_mov_vec(d, a);
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shli_vec(vece, t, a, sh);
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(m);
+}
+
+void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
+ const GVecGen2i ops[4] = {
+ { .fni8 = gen_shl8_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_shl16_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_shl32_ins_i32,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_shl64_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [0..esize-1]. */
+ tcg_debug_assert(shift >= 0);
+ tcg_debug_assert(shift < (8 << vece));
+
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
} else {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
-
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
- tcg_gen_shli_vec(vece, t, a, sh);
- tcg_gen_and_vec(vece, d, d, m);
- tcg_gen_or_vec(vece, d, d, t);
-
- tcg_temp_free_vec(t);
- tcg_temp_free_vec(m);
- }
-}
-
-static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 };
-
-const GVecGen2i sli_op[4] = {
- { .fni8 = gen_shl8_ins_i64,
- .fniv = gen_shl_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_8 },
- { .fni8 = gen_shl16_ins_i64,
- .fniv = gen_shl_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_16 },
- { .fni4 = gen_shl32_ins_i32,
- .fniv = gen_shl_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_32 },
- { .fni8 = gen_shl64_ins_i64,
- .fniv = gen_shl_ins_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_64 },
-};
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
{
@@ -4181,62 +4458,69 @@ static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
* these tables are shared with AArch64 which does support them.
*/
+void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_mla8_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_mla16_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_mla32_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_mla64_i64,
+ .fniv = gen_mla_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
-static const TCGOpcode vecop_list_mla[] = {
- INDEX_op_mul_vec, INDEX_op_add_vec, 0
-};
-
-static const TCGOpcode vecop_list_mls[] = {
- INDEX_op_mul_vec, INDEX_op_sub_vec, 0
-};
-
-const GVecGen3 mla_op[4] = {
- { .fni4 = gen_mla8_i32,
- .fniv = gen_mla_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mla,
- .vece = MO_8 },
- { .fni4 = gen_mla16_i32,
- .fniv = gen_mla_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mla,
- .vece = MO_16 },
- { .fni4 = gen_mla32_i32,
- .fniv = gen_mla_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mla,
- .vece = MO_32 },
- { .fni8 = gen_mla64_i64,
- .fniv = gen_mla_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_mla,
- .vece = MO_64 },
-};
-
-const GVecGen3 mls_op[4] = {
- { .fni4 = gen_mls8_i32,
- .fniv = gen_mls_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mls,
- .vece = MO_8 },
- { .fni4 = gen_mls16_i32,
- .fniv = gen_mls_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mls,
- .vece = MO_16 },
- { .fni4 = gen_mls32_i32,
- .fniv = gen_mls_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mls,
- .vece = MO_32 },
- { .fni8 = gen_mls64_i64,
- .fniv = gen_mls_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_mls,
- .vece = MO_64 },
-};
+void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_mul_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_mls8_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_mls16_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_mls32_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_mls64_i64,
+ .fniv = gen_mls_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
/* CMTST : test is "if (X & Y != 0)". */
static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
@@ -4260,27 +4544,31 @@ static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
}
-static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 };
-
-const GVecGen3 cmtst_op[4] = {
- { .fni4 = gen_helper_neon_tst_u8,
- .fniv = gen_cmtst_vec,
- .opt_opc = vecop_list_cmtst,
- .vece = MO_8 },
- { .fni4 = gen_helper_neon_tst_u16,
- .fniv = gen_cmtst_vec,
- .opt_opc = vecop_list_cmtst,
- .vece = MO_16 },
- { .fni4 = gen_cmtst_i32,
- .fniv = gen_cmtst_vec,
- .opt_opc = vecop_list_cmtst,
- .vece = MO_32 },
- { .fni8 = gen_cmtst_i64,
- .fniv = gen_cmtst_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list_cmtst,
- .vece = MO_64 },
-};
+void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_helper_neon_tst_u8,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_helper_neon_tst_u16,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_cmtst_i32,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_cmtst_i64,
+ .fniv = gen_cmtst_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
{
@@ -4398,29 +4686,33 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
tcg_temp_free_vec(rsh);
}
-static const TCGOpcode ushl_list[] = {
- INDEX_op_neg_vec, INDEX_op_shlv_vec,
- INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
-};
-
-const GVecGen3 ushl_op[4] = {
- { .fniv = gen_ushl_vec,
- .fno = gen_helper_gvec_ushl_b,
- .opt_opc = ushl_list,
- .vece = MO_8 },
- { .fniv = gen_ushl_vec,
- .fno = gen_helper_gvec_ushl_h,
- .opt_opc = ushl_list,
- .vece = MO_16 },
- { .fni4 = gen_ushl_i32,
- .fniv = gen_ushl_vec,
- .opt_opc = ushl_list,
- .vece = MO_32 },
- { .fni8 = gen_ushl_i64,
- .fniv = gen_ushl_vec,
- .opt_opc = ushl_list,
- .vece = MO_64 },
-};
+void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_neg_vec, INDEX_op_shlv_vec,
+ INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_ushl_vec,
+ .fno = gen_helper_gvec_ushl_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_ushl_vec,
+ .fno = gen_helper_gvec_ushl_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_ushl_i32,
+ .fniv = gen_ushl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_ushl_i64,
+ .fniv = gen_ushl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
{
@@ -4532,29 +4824,33 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
tcg_temp_free_vec(tmp);
}
-static const TCGOpcode sshl_list[] = {
- INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
- INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
-};
-
-const GVecGen3 sshl_op[4] = {
- { .fniv = gen_sshl_vec,
- .fno = gen_helper_gvec_sshl_b,
- .opt_opc = sshl_list,
- .vece = MO_8 },
- { .fniv = gen_sshl_vec,
- .fno = gen_helper_gvec_sshl_h,
- .opt_opc = sshl_list,
- .vece = MO_16 },
- { .fni4 = gen_sshl_i32,
- .fniv = gen_sshl_vec,
- .opt_opc = sshl_list,
- .vece = MO_32 },
- { .fni8 = gen_sshl_i64,
- .fniv = gen_sshl_vec,
- .opt_opc = sshl_list,
- .vece = MO_64 },
-};
+void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
+ INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_sshl_vec,
+ .fno = gen_helper_gvec_sshl_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_sshl_vec,
+ .fno = gen_helper_gvec_sshl_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_sshl_i32,
+ .fniv = gen_sshl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_sshl_i64,
+ .fniv = gen_sshl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
TCGv_vec a, TCGv_vec b)
@@ -4567,32 +4863,37 @@ static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
tcg_temp_free_vec(x);
}
-static const TCGOpcode vecop_list_uqadd[] = {
- INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
-};
-
-const GVecGen4 uqadd_op[4] = {
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_b,
- .write_aofs = true,
- .opt_opc = vecop_list_uqadd,
- .vece = MO_8 },
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_h,
- .write_aofs = true,
- .opt_opc = vecop_list_uqadd,
- .vece = MO_16 },
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_s,
- .write_aofs = true,
- .opt_opc = vecop_list_uqadd,
- .vece = MO_32 },
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_d,
- .write_aofs = true,
- .opt_opc = vecop_list_uqadd,
- .vece = MO_64 },
-};
+void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_b,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_h,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_s,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_d,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
TCGv_vec a, TCGv_vec b)
@@ -4605,32 +4906,37 @@ static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
tcg_temp_free_vec(x);
}
-static const TCGOpcode vecop_list_sqadd[] = {
- INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
-};
-
-const GVecGen4 sqadd_op[4] = {
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_b,
- .opt_opc = vecop_list_sqadd,
- .write_aofs = true,
- .vece = MO_8 },
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_h,
- .opt_opc = vecop_list_sqadd,
- .write_aofs = true,
- .vece = MO_16 },
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_s,
- .opt_opc = vecop_list_sqadd,
- .write_aofs = true,
- .vece = MO_32 },
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_d,
- .opt_opc = vecop_list_sqadd,
- .write_aofs = true,
- .vece = MO_64 },
-};
+void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
TCGv_vec a, TCGv_vec b)
@@ -4643,32 +4949,37 @@ static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
tcg_temp_free_vec(x);
}
-static const TCGOpcode vecop_list_uqsub[] = {
- INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
-};
-
-const GVecGen4 uqsub_op[4] = {
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_b,
- .opt_opc = vecop_list_uqsub,
- .write_aofs = true,
- .vece = MO_8 },
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_h,
- .opt_opc = vecop_list_uqsub,
- .write_aofs = true,
- .vece = MO_16 },
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_s,
- .opt_opc = vecop_list_uqsub,
- .write_aofs = true,
- .vece = MO_32 },
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_d,
- .opt_opc = vecop_list_uqsub,
- .write_aofs = true,
- .vece = MO_64 },
-};
+void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
TCGv_vec a, TCGv_vec b)
@@ -4681,32 +4992,275 @@ static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
tcg_temp_free_vec(x);
}
-static const TCGOpcode vecop_list_sqsub[] = {
- INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
-};
+void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
-const GVecGen4 sqsub_op[4] = {
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_b,
- .opt_opc = vecop_list_sqsub,
- .write_aofs = true,
- .vece = MO_8 },
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_h,
- .opt_opc = vecop_list_sqsub,
- .write_aofs = true,
- .vece = MO_16 },
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_s,
- .opt_opc = vecop_list_sqsub,
- .write_aofs = true,
- .vece = MO_32 },
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_d,
- .opt_opc = vecop_list_sqsub,
- .write_aofs = true,
- .vece = MO_64 },
-};
+static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_sub_i32(t, a, b);
+ tcg_gen_sub_i32(d, b, a);
+ tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
+ tcg_temp_free_i32(t);
+}
+
+static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_sub_i64(t, a, b);
+ tcg_gen_sub_i64(d, b, a);
+ tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_smin_vec(vece, t, a, b);
+ tcg_gen_smax_vec(vece, d, a, b);
+ tcg_gen_sub_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
+
+void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_sabd_i32,
+ .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_sabd_i64,
+ .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_sub_i32(t, a, b);
+ tcg_gen_sub_i32(d, b, a);
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
+ tcg_temp_free_i32(t);
+}
+
+static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_sub_i64(t, a, b);
+ tcg_gen_sub_i64(d, b, a);
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_umin_vec(vece, t, a, b);
+ tcg_gen_umax_vec(vece, d, a, b);
+ tcg_gen_sub_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
+
+void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_uabd_i32,
+ .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_uabd_i64,
+ .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+ gen_sabd_i32(t, a, b);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
+
+static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ gen_sabd_i64(t, a, b);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ gen_sabd_vec(vece, t, a, b);
+ tcg_gen_add_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
+
+void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_add_vec,
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_saba_i32,
+ .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_saba_i64,
+ .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+ gen_uabd_i32(t, a, b);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
+
+static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ gen_uabd_i64(t, a, b);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ gen_uabd_vec(vece, t, a, b);
+ tcg_gen_add_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
+
+void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_add_vec,
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_uaba_i32,
+ .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_uaba_i64,
+ .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
/* Translate a NEON data processing instruction. Return nonzero if the
instruction is invalid.
@@ -4722,12 +5276,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
int shift;
int pass;
int count;
- int pairwise;
int u;
int vec_size;
uint32_t imm;
TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
- TCGv_ptr ptr1, ptr2, ptr3;
+ TCGv_ptr ptr1, ptr2;
TCGv_i64 tmp64;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
@@ -4758,413 +5311,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
rm_ofs = neon_reg_offset(rm, 0);
if ((insn & (1 << 23)) == 0) {
- /* Three register same length. */
- op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
- /* Catch invalid op and bad size combinations: UNDEF */
- if ((neon_3r_sizes[op] & (1 << size)) == 0) {
- return 1;
- }
- /* All insns of this form UNDEF for either this condition or the
- * superset of cases "Q==1"; we catch the latter later.
- */
- if (q && ((rd | rn | rm) & 1)) {
- return 1;
- }
- switch (op) {
- case NEON_3R_SHA:
- /* The SHA-1/SHA-256 3-register instructions require special
- * treatment here, as their size field is overloaded as an
- * op type selector, and they all consume their input in a
- * single pass.
- */
- if (!q) {
- return 1;
- }
- if (!u) { /* SHA-1 */
- if (!dc_isar_feature(aa32_sha1, s)) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rn);
- ptr3 = vfp_reg_ptr(true, rm);
- tmp4 = tcg_const_i32(size);
- gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
- tcg_temp_free_i32(tmp4);
- } else { /* SHA-256 */
- if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rn);
- ptr3 = vfp_reg_ptr(true, rm);
- switch (size) {
- case 0:
- gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
- break;
- case 1:
- gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
- break;
- case 2:
- gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
- break;
- }
- }
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- tcg_temp_free_ptr(ptr3);
- return 0;
-
- case NEON_3R_VPADD_VQRDMLAH:
- if (!u) {
- break; /* VPADD */
- }
- /* VQRDMLAH */
- switch (size) {
- case 1:
- return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
- q, rd, rn, rm);
- case 2:
- return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
- q, rd, rn, rm);
- }
- return 1;
-
- case NEON_3R_VFM_VQRDMLSH:
- if (!u) {
- /* VFM, VFMS */
- if (size == 1) {
- return 1;
- }
- break;
- }
- /* VQRDMLSH */
- switch (size) {
- case 1:
- return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
- q, rd, rn, rm);
- case 2:
- return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
- q, rd, rn, rm);
- }
- return 1;
-
- case NEON_3R_VADD_VSUB:
- case NEON_3R_LOGIC:
- case NEON_3R_VMAX:
- case NEON_3R_VMIN:
- case NEON_3R_VTST_VCEQ:
- case NEON_3R_VCGT:
- case NEON_3R_VCGE:
- case NEON_3R_VQADD:
- case NEON_3R_VQSUB:
- case NEON_3R_VMUL:
- case NEON_3R_VML:
- case NEON_3R_VSHL:
- /* Already handled by decodetree */
- return 1;
- }
-
- if (size == 3) {
- /* 64-bit element instructions. */
- for (pass = 0; pass < (q ? 2 : 1); pass++) {
- neon_load_reg64(cpu_V0, rn + pass);
- neon_load_reg64(cpu_V1, rm + pass);
- switch (op) {
- case NEON_3R_VQSHL:
- if (u) {
- gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- } else {
- gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- }
- break;
- case NEON_3R_VRSHL:
- if (u) {
- gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
- } else {
- gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
- }
- break;
- case NEON_3R_VQRSHL:
- if (u) {
- gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- } else {
- gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- }
- break;
- default:
- abort();
- }
- neon_store_reg64(cpu_V0, rd + pass);
- }
- return 0;
- }
- pairwise = 0;
- switch (op) {
- case NEON_3R_VQSHL:
- case NEON_3R_VRSHL:
- case NEON_3R_VQRSHL:
- {
- int rtmp;
- /* Shift instruction operands are reversed. */
- rtmp = rn;
- rn = rm;
- rm = rtmp;
- }
- break;
- case NEON_3R_VPADD_VQRDMLAH:
- case NEON_3R_VPMAX:
- case NEON_3R_VPMIN:
- pairwise = 1;
- break;
- case NEON_3R_FLOAT_ARITH:
- pairwise = (u && size < 2); /* if VPADD (float) */
- break;
- case NEON_3R_FLOAT_MINMAX:
- pairwise = u; /* if VPMIN/VPMAX (float) */
- break;
- case NEON_3R_FLOAT_CMP:
- if (!u && size) {
- /* no encoding for U=0 C=1x */
- return 1;
- }
- break;
- case NEON_3R_FLOAT_ACMP:
- if (!u) {
- return 1;
- }
- break;
- case NEON_3R_FLOAT_MISC:
- /* VMAXNM/VMINNM in ARMv8 */
- if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
- return 1;
- }
- break;
- case NEON_3R_VFM_VQRDMLSH:
- if (!dc_isar_feature(aa32_simdfmac, s)) {
- return 1;
- }
- break;
- default:
- break;
- }
-
- if (pairwise && q) {
- /* All the pairwise insns UNDEF if Q is set */
- return 1;
- }
-
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
-
- if (pairwise) {
- /* Pairwise. */
- if (pass < 1) {
- tmp = neon_load_reg(rn, 0);
- tmp2 = neon_load_reg(rn, 1);
- } else {
- tmp = neon_load_reg(rm, 0);
- tmp2 = neon_load_reg(rm, 1);
- }
- } else {
- /* Elementwise. */
- tmp = neon_load_reg(rn, pass);
- tmp2 = neon_load_reg(rm, pass);
- }
- switch (op) {
- case NEON_3R_VHADD:
- GEN_NEON_INTEGER_OP(hadd);
- break;
- case NEON_3R_VRHADD:
- GEN_NEON_INTEGER_OP(rhadd);
- break;
- case NEON_3R_VHSUB:
- GEN_NEON_INTEGER_OP(hsub);
- break;
- case NEON_3R_VQSHL:
- GEN_NEON_INTEGER_OP_ENV(qshl);
- break;
- case NEON_3R_VRSHL:
- GEN_NEON_INTEGER_OP(rshl);
- break;
- case NEON_3R_VQRSHL:
- GEN_NEON_INTEGER_OP_ENV(qrshl);
- break;
- case NEON_3R_VABD:
- GEN_NEON_INTEGER_OP(abd);
- break;
- case NEON_3R_VABA:
- GEN_NEON_INTEGER_OP(abd);
- tcg_temp_free_i32(tmp2);
- tmp2 = neon_load_reg(rd, pass);
- gen_neon_add(size, tmp, tmp2);
- break;
- case NEON_3R_VPMAX:
- GEN_NEON_INTEGER_OP(pmax);
- break;
- case NEON_3R_VPMIN:
- GEN_NEON_INTEGER_OP(pmin);
- break;
- case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
- if (!u) { /* VQDMULH */
- switch (size) {
- case 1:
- gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
- break;
- default: abort();
- }
- } else { /* VQRDMULH */
- switch (size) {
- case 1:
- gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
- break;
- default: abort();
- }
- }
- break;
- case NEON_3R_VPADD_VQRDMLAH:
- switch (size) {
- case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
- case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
- default: abort();
- }
- break;
- case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- switch ((u << 2) | size) {
- case 0: /* VADD */
- case 4: /* VPADD */
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
- break;
- case 2: /* VSUB */
- gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
- break;
- case 6: /* VABD */
- gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
- break;
- default:
- abort();
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_MULTIPLY:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
- if (!u) {
- tcg_temp_free_i32(tmp2);
- tmp2 = neon_load_reg(rd, pass);
- if (size == 0) {
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
- }
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_CMP:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (!u) {
- gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
- } else {
- if (size == 0) {
- gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
- }
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_ACMP:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (size == 0) {
- gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_MINMAX:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (size == 0) {
- gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_MISC:
- if (u) {
- /* VMAXNM/VMINNM */
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (size == 0) {
- gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
- }
- tcg_temp_free_ptr(fpstatus);
- } else {
- if (size == 0) {
- gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
- } else {
- gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
- }
- }
- break;
- case NEON_3R_VFM_VQRDMLSH:
- {
- /* VFMA, VFMS: fused multiply-add */
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- TCGv_i32 tmp3 = neon_load_reg(rd, pass);
- if (size) {
- /* VFMS */
- gen_helper_vfp_negs(tmp, tmp);
- }
- gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
- tcg_temp_free_i32(tmp3);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- default:
- abort();
- }
- tcg_temp_free_i32(tmp2);
-
- /* Save the result. For elementwise operations we can put it
- straight into the destination register. For pairwise operations
- we have to be careful to avoid clobbering the source operands. */
- if (pairwise && rd == rm) {
- neon_store_scratch(pass, tmp);
- } else {
- neon_store_reg(rd, pass, tmp);
- }
-
- } /* for pass */
- if (pairwise && rd == rm) {
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
- tmp = neon_load_scratch(pass);
- neon_store_reg(rd, pass, tmp);
- }
- }
- /* End of 3 register same size operations. */
+ /* Three register same length: handled by decodetree */
+ return 1;
} else if (insn & (1 << 4)) {
if ((insn & 0x00380080) != 0) {
/* Two registers and shift. */
@@ -5220,19 +5368,36 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
case 1: /* VSRA */
/* Right shift comes here negative. */
shift = -shift;
- /* Shifts larger than the element size are architecturally
- * valid. Unsigned results in all zeros; signed results
- * in all sign bits.
- */
- if (!u) {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
- MIN(shift, (8 << size) - 1),
- &ssra_op[size]);
- } else if (shift >= 8 << size) {
- /* rd += 0 */
+ if (u) {
+ gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
} else {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
- shift, &usra_op[size]);
+ gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
+ }
+ return 0;
+
+ case 2: /* VRSHR */
+ /* Right shift comes here negative. */
+ shift = -shift;
+ if (u) {
+ gen_gvec_urshr(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
+ } else {
+ gen_gvec_srshr(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
+ }
+ return 0;
+
+ case 3: /* VRSRA */
+ /* Right shift comes here negative. */
+ shift = -shift;
+ if (u) {
+ gen_gvec_ursra(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
+ } else {
+ gen_gvec_srsra(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
}
return 0;
@@ -5242,31 +5407,17 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
}
/* Right shift comes here negative. */
shift = -shift;
- /* Shift out of range leaves destination unchanged. */
- if (shift < 8 << size) {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
- shift, &sri_op[size]);
- }
+ gen_gvec_sri(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
return 0;
case 5: /* VSHL, VSLI */
if (u) { /* VSLI */
- /* Shift out of range leaves destination unchanged. */
- if (shift < 8 << size) {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
- vec_size, shift, &sli_op[size]);
- }
+ gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
} else { /* VSHL */
- /* Shifts larger than the element size are
- * architecturally valid and results in zero.
- */
- if (shift >= 8 << size) {
- tcg_gen_gvec_dup_imm(size, rd_ofs,
- vec_size, vec_size, 0);
- } else {
- tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- }
+ tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
}
return 0;
}
@@ -5287,13 +5438,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
neon_load_reg64(cpu_V0, rm + pass);
tcg_gen_movi_i64(cpu_V1, imm);
switch (op) {
- case 2: /* VRSHR */
- case 3: /* VRSRA */
- if (u)
- gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
- else
- gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
- break;
case 6: /* VQSHLU */
gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
cpu_V0, cpu_V1);
@@ -5310,11 +5454,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
default:
g_assert_not_reached();
}
- if (op == 3) {
- /* Accumulate. */
- neon_load_reg64(cpu_V1, rd + pass);
- tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
- }
neon_store_reg64(cpu_V0, rd + pass);
} else { /* size < 3 */
/* Operands in T0 and T1. */
@@ -5322,10 +5461,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
tmp2 = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp2, imm);
switch (op) {
- case 2: /* VRSHR */
- case 3: /* VRSRA */
- GEN_NEON_INTEGER_OP(rshl);
- break;
case 6: /* VQSHLU */
switch (size) {
case 0:
@@ -5351,13 +5486,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
g_assert_not_reached();
}
tcg_temp_free_i32(tmp2);
-
- if (op == 3) {
- /* Accumulate. */
- tmp2 = neon_load_reg(rd, pass);
- gen_neon_add(size, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
neon_store_reg(rd, pass, tmp);
}
} /* for pass */
@@ -6308,24 +6436,19 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
break;
case NEON_2RM_VCEQ0:
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
- vec_size, &ceq0_op[size]);
+ gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
break;
case NEON_2RM_VCGT0:
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
- vec_size, &cgt0_op[size]);
+ gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
break;
case NEON_2RM_VCLE0:
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
- vec_size, &cle0_op[size]);
+ gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
break;
case NEON_2RM_VCGE0:
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
- vec_size, &cge0_op[size]);
+ gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
break;
case NEON_2RM_VCLT0:
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
- vec_size, &clt0_op[size]);
+ gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
break;
default:
@@ -6522,19 +6645,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
break;
}
case NEON_2RM_VRECPE:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_recpe_u32(tmp, tmp, fpstatus);
- tcg_temp_free_ptr(fpstatus);
+ gen_helper_recpe_u32(tmp, tmp);
break;
- }
case NEON_2RM_VRSQRTE:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
- tcg_temp_free_ptr(fpstatus);
+ gen_helper_rsqrte_u32(tmp, tmp);
break;
- }
case NEON_2RM_VRECPE_F:
{
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
diff --git a/target/arm/translate.h b/target/arm/translate.h
index cb7925ea46..c937dfe9bf 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -275,30 +275,78 @@ static inline void gen_swstep_exception(DisasContext *s, int isv, int ex)
uint64_t vfp_expand_imm(int size, uint8_t imm8);
/* Vector operations shared between ARM and AArch64. */
-extern const GVecGen2 ceq0_op[4];
-extern const GVecGen2 clt0_op[4];
-extern const GVecGen2 cgt0_op[4];
-extern const GVecGen2 cle0_op[4];
-extern const GVecGen2 cge0_op[4];
-extern const GVecGen3 mla_op[4];
-extern const GVecGen3 mls_op[4];
-extern const GVecGen3 cmtst_op[4];
-extern const GVecGen3 sshl_op[4];
-extern const GVecGen3 ushl_op[4];
-extern const GVecGen2i ssra_op[4];
-extern const GVecGen2i usra_op[4];
-extern const GVecGen2i sri_op[4];
-extern const GVecGen2i sli_op[4];
-extern const GVecGen4 uqadd_op[4];
-extern const GVecGen4 sqadd_op[4];
-extern const GVecGen4 uqsub_op[4];
-extern const GVecGen4 sqsub_op[4];
+void gen_gvec_ceq0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_clt0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_cgt0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_cle0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_cge0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
/*
* Forward to the isar_feature_* tests given a DisasContext pointer.
*/
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 3d534188a8..50a499299f 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -36,8 +36,6 @@
#define H4(x) (x)
#endif
-#define SET_QC() env->vfp.qc[0] = 1
-
static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
{
uint64_t *d = vd + opr_sz;
@@ -49,8 +47,8 @@ static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
}
/* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
-static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
- int16_t src2, int16_t src3)
+static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2,
+ int16_t src3, uint32_t *sat)
{
/* Simplify:
* = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16
@@ -60,7 +58,7 @@ static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
ret = ((int32_t)src3 << 15) + ret + (1 << 14);
ret >>= 15;
if (ret != (int16_t)ret) {
- SET_QC();
+ *sat = 1;
ret = (ret < 0 ? -0x8000 : 0x7fff);
}
return ret;
@@ -69,30 +67,30 @@ static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1,
uint32_t src2, uint32_t src3)
{
- uint16_t e1 = inl_qrdmlah_s16(env, src1, src2, src3);
- uint16_t e2 = inl_qrdmlah_s16(env, src1 >> 16, src2 >> 16, src3 >> 16);
+ uint32_t *sat = &env->vfp.qc[0];
+ uint16_t e1 = inl_qrdmlah_s16(src1, src2, src3, sat);
+ uint16_t e2 = inl_qrdmlah_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
return deposit32(e1, 16, 16, e2);
}
void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm,
- void *ve, uint32_t desc)
+ void *vq, uint32_t desc)
{
uintptr_t opr_sz = simd_oprsz(desc);
int16_t *d = vd;
int16_t *n = vn;
int16_t *m = vm;
- CPUARMState *env = ve;
uintptr_t i;
for (i = 0; i < opr_sz / 2; ++i) {
- d[i] = inl_qrdmlah_s16(env, n[i], m[i], d[i]);
+ d[i] = inl_qrdmlah_s16(n[i], m[i], d[i], vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
/* Signed saturating rounding doubling multiply-subtract high half, 16-bit */
-static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
- int16_t src2, int16_t src3)
+static int16_t inl_qrdmlsh_s16(int16_t src1, int16_t src2,
+ int16_t src3, uint32_t *sat)
{
/* Similarly, using subtraction:
* = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16
@@ -102,7 +100,7 @@ static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
ret = ((int32_t)src3 << 15) - ret + (1 << 14);
ret >>= 15;
if (ret != (int16_t)ret) {
- SET_QC();
+ *sat = 1;
ret = (ret < 0 ? -0x8000 : 0x7fff);
}
return ret;
@@ -111,85 +109,97 @@ static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1,
uint32_t src2, uint32_t src3)
{
- uint16_t e1 = inl_qrdmlsh_s16(env, src1, src2, src3);
- uint16_t e2 = inl_qrdmlsh_s16(env, src1 >> 16, src2 >> 16, src3 >> 16);
+ uint32_t *sat = &env->vfp.qc[0];
+ uint16_t e1 = inl_qrdmlsh_s16(src1, src2, src3, sat);
+ uint16_t e2 = inl_qrdmlsh_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
return deposit32(e1, 16, 16, e2);
}
void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm,
- void *ve, uint32_t desc)
+ void *vq, uint32_t desc)
{
uintptr_t opr_sz = simd_oprsz(desc);
int16_t *d = vd;
int16_t *n = vn;
int16_t *m = vm;
- CPUARMState *env = ve;
uintptr_t i;
for (i = 0; i < opr_sz / 2; ++i) {
- d[i] = inl_qrdmlsh_s16(env, n[i], m[i], d[i]);
+ d[i] = inl_qrdmlsh_s16(n[i], m[i], d[i], vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
/* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */
-uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
- int32_t src2, int32_t src3)
+static int32_t inl_qrdmlah_s32(int32_t src1, int32_t src2,
+ int32_t src3, uint32_t *sat)
{
/* Simplify similarly to int_qrdmlah_s16 above. */
int64_t ret = (int64_t)src1 * src2;
ret = ((int64_t)src3 << 31) + ret + (1 << 30);
ret >>= 31;
if (ret != (int32_t)ret) {
- SET_QC();
+ *sat = 1;
ret = (ret < 0 ? INT32_MIN : INT32_MAX);
}
return ret;
}
+uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
+ int32_t src2, int32_t src3)
+{
+ uint32_t *sat = &env->vfp.qc[0];
+ return inl_qrdmlah_s32(src1, src2, src3, sat);
+}
+
void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
- void *ve, uint32_t desc)
+ void *vq, uint32_t desc)
{
uintptr_t opr_sz = simd_oprsz(desc);
int32_t *d = vd;
int32_t *n = vn;
int32_t *m = vm;
- CPUARMState *env = ve;
uintptr_t i;
for (i = 0; i < opr_sz / 4; ++i) {
- d[i] = helper_neon_qrdmlah_s32(env, n[i], m[i], d[i]);
+ d[i] = inl_qrdmlah_s32(n[i], m[i], d[i], vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
/* Signed saturating rounding doubling multiply-subtract high half, 32-bit */
-uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
- int32_t src2, int32_t src3)
+static int32_t inl_qrdmlsh_s32(int32_t src1, int32_t src2,
+ int32_t src3, uint32_t *sat)
{
/* Simplify similarly to int_qrdmlsh_s16 above. */
int64_t ret = (int64_t)src1 * src2;
ret = ((int64_t)src3 << 31) - ret + (1 << 30);
ret >>= 31;
if (ret != (int32_t)ret) {
- SET_QC();
+ *sat = 1;
ret = (ret < 0 ? INT32_MIN : INT32_MAX);
}
return ret;
}
+uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
+ int32_t src2, int32_t src3)
+{
+ uint32_t *sat = &env->vfp.qc[0];
+ return inl_qrdmlsh_s32(src1, src2, src3, sat);
+}
+
void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
- void *ve, uint32_t desc)
+ void *vq, uint32_t desc)
{
uintptr_t opr_sz = simd_oprsz(desc);
int32_t *d = vd;
int32_t *n = vn;
int32_t *m = vm;
- CPUARMState *env = ve;
uintptr_t i;
for (i = 0; i < opr_sz / 4; ++i) {
- d[i] = helper_neon_qrdmlsh_s32(env, n[i], m[i], d[i]);
+ d[i] = inl_qrdmlsh_s32(n[i], m[i], d[i], vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
@@ -681,6 +691,11 @@ static float64 float64_ftsmul(float64 op1, uint64_t op2, float_status *stat)
return result;
}
+static float32 float32_abd(float32 op1, float32 op2, float_status *stat)
+{
+ return float32_abs(float32_sub(op1, op2, stat));
+}
+
#define DO_3OP(NAME, FUNC, TYPE) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
{ \
@@ -708,6 +723,8 @@ DO_3OP(gvec_ftsmul_h, float16_ftsmul, float16)
DO_3OP(gvec_ftsmul_s, float32_ftsmul, float32)
DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64)
+DO_3OP(gvec_fabd_s, float32_abd, float32)
+
#ifdef TARGET_AARCH64
DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)
@@ -737,6 +754,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
d[i + j] = TYPE##_mul(n[i + j], mm, stat); \
} \
} \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
}
DO_MUL_IDX(gvec_fmul_idx_h, float16, H2)
@@ -761,6 +779,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
mm, a[i + j], 0, stat); \
} \
} \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
}
DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
@@ -899,6 +918,119 @@ void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn,
clear_tail(d, oprsz, simd_maxsz(desc));
}
+
+#define DO_SRA(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] += n[i] >> shift; \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_SRA(gvec_ssra_b, int8_t)
+DO_SRA(gvec_ssra_h, int16_t)
+DO_SRA(gvec_ssra_s, int32_t)
+DO_SRA(gvec_ssra_d, int64_t)
+
+DO_SRA(gvec_usra_b, uint8_t)
+DO_SRA(gvec_usra_h, uint16_t)
+DO_SRA(gvec_usra_s, uint32_t)
+DO_SRA(gvec_usra_d, uint64_t)
+
+#undef DO_SRA
+
+#define DO_RSHR(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ TYPE tmp = n[i] >> (shift - 1); \
+ d[i] = (tmp >> 1) + (tmp & 1); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_RSHR(gvec_srshr_b, int8_t)
+DO_RSHR(gvec_srshr_h, int16_t)
+DO_RSHR(gvec_srshr_s, int32_t)
+DO_RSHR(gvec_srshr_d, int64_t)
+
+DO_RSHR(gvec_urshr_b, uint8_t)
+DO_RSHR(gvec_urshr_h, uint16_t)
+DO_RSHR(gvec_urshr_s, uint32_t)
+DO_RSHR(gvec_urshr_d, uint64_t)
+
+#undef DO_RSHR
+
+#define DO_RSRA(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ TYPE tmp = n[i] >> (shift - 1); \
+ d[i] += (tmp >> 1) + (tmp & 1); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_RSRA(gvec_srsra_b, int8_t)
+DO_RSRA(gvec_srsra_h, int16_t)
+DO_RSRA(gvec_srsra_s, int32_t)
+DO_RSRA(gvec_srsra_d, int64_t)
+
+DO_RSRA(gvec_ursra_b, uint8_t)
+DO_RSRA(gvec_ursra_h, uint16_t)
+DO_RSRA(gvec_ursra_s, uint32_t)
+DO_RSRA(gvec_ursra_d, uint64_t)
+
+#undef DO_RSRA
+
+#define DO_SRI(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = deposit64(d[i], 0, sizeof(TYPE) * 8 - shift, n[i] >> shift); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_SRI(gvec_sri_b, uint8_t)
+DO_SRI(gvec_sri_h, uint16_t)
+DO_SRI(gvec_sri_s, uint32_t)
+DO_SRI(gvec_sri_d, uint64_t)
+
+#undef DO_SRI
+
+#define DO_SLI(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = deposit64(d[i], shift, sizeof(TYPE) * 8 - shift, n[i]); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_SLI(gvec_sli_b, uint8_t)
+DO_SLI(gvec_sli_h, uint16_t)
+DO_SLI(gvec_sli_s, uint32_t)
+DO_SLI(gvec_sli_d, uint64_t)
+
+#undef DO_SLI
+
/*
* Convert float16 to float32, raising no exceptions and
* preserving exceptional values, including SNaN.
@@ -1282,3 +1414,51 @@ DO_CMP0(gvec_cgt0_h, int16_t, >)
DO_CMP0(gvec_cge0_h, int16_t, >=)
#undef DO_CMP0
+
+#define DO_ABD(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ \
+ for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \
+ d[i] = n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \
+ } \
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
+}
+
+DO_ABD(gvec_sabd_b, int8_t)
+DO_ABD(gvec_sabd_h, int16_t)
+DO_ABD(gvec_sabd_s, int32_t)
+DO_ABD(gvec_sabd_d, int64_t)
+
+DO_ABD(gvec_uabd_b, uint8_t)
+DO_ABD(gvec_uabd_h, uint16_t)
+DO_ABD(gvec_uabd_s, uint32_t)
+DO_ABD(gvec_uabd_d, uint64_t)
+
+#undef DO_ABD
+
+#define DO_ABA(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ \
+ for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \
+ d[i] += n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \
+ } \
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
+}
+
+DO_ABA(gvec_saba_b, int8_t)
+DO_ABA(gvec_saba_h, int16_t)
+DO_ABA(gvec_saba_s, int32_t)
+DO_ABA(gvec_saba_d, int64_t)
+
+DO_ABA(gvec_uaba_b, uint8_t)
+DO_ABA(gvec_uaba_h, uint16_t)
+DO_ABA(gvec_uaba_s, uint32_t)
+DO_ABA(gvec_uaba_d, uint64_t)
+
+#undef DO_ABA
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 930d6e747f..88483d4066 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -581,7 +581,7 @@ uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
#define float32_three make_float32(0x40400000)
#define float32_one_point_five make_float32(0x3fc00000)
-float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env)
+float32 HELPER(recps_f32)(CPUARMState *env, float32 a, float32 b)
{
float_status *s = &env->vfp.standard_fp_status;
if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
@@ -594,7 +594,7 @@ float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env)
return float32_sub(float32_two, float32_mul(a, b, s), s);
}
-float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env)
+float32 HELPER(rsqrts_f32)(CPUARMState *env, float32 a, float32 b)
{
float_status *s = &env->vfp.standard_fp_status;
float32 product;
@@ -1023,9 +1023,8 @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
return make_float64(val);
}
-uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
+uint32_t HELPER(recpe_u32)(uint32_t a)
{
- /* float_status *s = fpstp; */
int input, estimate;
if ((a & 0x80000000) == 0) {
@@ -1038,7 +1037,7 @@ uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
return deposit32(0, (32 - 9), 9, estimate);
}
-uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
+uint32_t HELPER(rsqrte_u32)(uint32_t a)
{
int estimate;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index e818fc712a..408392dbf6 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -29,6 +29,8 @@
/* The x86 has a strong memory model with some store-after-load re-ordering */
#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
+#define KVM_HAVE_MCE_INJECTION 1
+
/* Maximum instruction code size */
#define TARGET_MAX_INSN_SIZE 16
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 4901c6dd74..34f838728d 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -24,7 +24,6 @@
#include "sysemu/sysemu.h"
#include "sysemu/hw_accel.h"
#include "sysemu/kvm_int.h"
-#include "sysemu/reset.h"
#include "sysemu/runstate.h"
#include "kvm_i386.h"
#include "hyperv.h"
@@ -533,40 +532,6 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index)
}
}
-
-typedef struct HWPoisonPage {
- ram_addr_t ram_addr;
- QLIST_ENTRY(HWPoisonPage) list;
-} HWPoisonPage;
-
-static QLIST_HEAD(, HWPoisonPage) hwpoison_page_list =
- QLIST_HEAD_INITIALIZER(hwpoison_page_list);
-
-static void kvm_unpoison_all(void *param)
-{
- HWPoisonPage *page, *next_page;
-
- QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) {
- QLIST_REMOVE(page, list);
- qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE);
- g_free(page);
- }
-}
-
-static void kvm_hwpoison_page_add(ram_addr_t ram_addr)
-{
- HWPoisonPage *page;
-
- QLIST_FOREACH(page, &hwpoison_page_list, list) {
- if (page->ram_addr == ram_addr) {
- return;
- }
- }
- page = g_new(HWPoisonPage, 1);
- page->ram_addr = ram_addr;
- QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
-}
-
static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap,
int *max_banks)
{
@@ -2180,7 +2145,6 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
fprintf(stderr, "e820_add_entry() table is full\n");
return ret;
}
- qemu_register_reset(kvm_unpoison_all, NULL);
shadow_mem = object_property_get_int(OBJECT(s), "kvm-shadow-mem", &error_abort);
if (shadow_mem != -1) {