summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Maydell2020-06-16 14:36:31 +0200
committerPeter Maydell2020-06-16 14:36:31 +0200
commitcb8278cd997f4776b5a38fce7859bbe3b2d8d139 (patch)
treecf41c46b445f6ca9bfeb4c078a14d2ffbd98b159
parentMerge remote-tracking branch 'remotes/armbru/tags/pull-qom-2020-06-15' into s... (diff)
parenthw: arm: Set vendor property for IMX SDHCI emulations (diff)
downloadqemu-cb8278cd997f4776b5a38fce7859bbe3b2d8d139.tar.gz
qemu-cb8278cd997f4776b5a38fce7859bbe3b2d8d139.tar.xz
qemu-cb8278cd997f4776b5a38fce7859bbe3b2d8d139.zip
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200616' into staging
* hw: arm: Set vendor property for IMX SDHCI emulations * sd: sdhci: Implement basic vendor specific register support * hw/net/imx_fec: Convert debug fprintf() to trace events * target/arm/cpu: adjust virtual time for all KVM arm cpus * Implement configurable descriptor size in ftgmac100 * hw/misc/imx6ul_ccm: Implement non writable bits in CCM registers * target/arm: More Neon decodetree conversion work # gpg: Signature made Tue 16 Jun 2020 10:56:10 BST # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20200616: (23 commits) hw: arm: Set vendor property for IMX SDHCI emulations sd: sdhci: Implement basic vendor specific register support hw/net/imx_fec: Convert debug fprintf() to trace events target/arm/cpu: adjust virtual time for all KVM arm cpus Implement configurable descriptor size in ftgmac100 hw/misc/imx6ul_ccm: Implement non writable bits in CCM registers target/arm: Convert Neon VDUP (scalar) to decodetree target/arm: Convert Neon VTBL, VTBX to decodetree target/arm: Convert Neon VEXT to decodetree target/arm: Convert Neon 2-reg-scalar long multiplies to decodetree target/arm: Convert Neon 2-reg-scalar VQRDMLAH, VQRDMLSH to decodetree target/arm: Convert Neon 2-reg-scalar VQDMULH, VQRDMULH to decodetree target/arm: Convert Neon 2-reg-scalar float multiplies to decodetree target/arm: Convert Neon 2-reg-scalar integer multiplies to decodetree target/arm: Add missing TCG temp free in do_2shift_env_64() target/arm: Add 'static' and 'const' annotations to VSHLL function arrays target/arm: Convert Neon 3-reg-diff polynomial VMULL target/arm: Convert Neon 3-reg-diff saturating doubling multiplies target/arm: Convert Neon 3-reg-diff long multiplies target/arm: Convert Neon 3-reg-diff VABAL, VABDL to decodetree ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org> # Conflicts: # hw/arm/fsl-imx25.c # hw/arm/fsl-imx6.c # hw/arm/fsl-imx6ul.c # hw/arm/fsl-imx7.c
-rw-r--r--hw/arm/fsl-imx25.c6
-rw-r--r--hw/arm/fsl-imx6.c6
-rw-r--r--hw/arm/fsl-imx6ul.c2
-rw-r--r--hw/arm/fsl-imx7.c2
-rw-r--r--hw/misc/imx6ul_ccm.c76
-rw-r--r--hw/net/ftgmac100.c26
-rw-r--r--hw/net/imx_fec.c106
-rw-r--r--hw/net/trace-events18
-rw-r--r--hw/sd/sdhci-internal.h5
-rw-r--r--hw/sd/sdhci.c18
-rw-r--r--include/hw/sd/sdhci.h5
-rw-r--r--target/arm/cpu.c6
-rw-r--r--target/arm/cpu64.c1
-rw-r--r--target/arm/kvm.c21
-rw-r--r--target/arm/neon-dp.decode130
-rw-r--r--target/arm/translate-neon.inc.c1148
-rw-r--r--target/arm/translate.c684
-rw-r--r--target/arm/translate.h1
18 files changed, 1495 insertions, 766 deletions
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
index fb516bdbac..f32f9bce0f 100644
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -263,6 +263,12 @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp)
&err);
object_property_set_uint(OBJECT(&s->esdhc[i]), IMX25_ESDHC_CAPABILITIES,
"capareg", &err);
+ object_property_set_uint(OBJECT(&s->esdhc[i]), SDHCI_VENDOR_IMX,
+ "vendor", &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
sysbus_realize(SYS_BUS_DEVICE(&s->esdhc[i]), &err);
if (err) {
error_propagate(errp, err);
diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
index 17593485b7..d4bc4fae93 100644
--- a/hw/arm/fsl-imx6.c
+++ b/hw/arm/fsl-imx6.c
@@ -339,6 +339,12 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
&err);
object_property_set_uint(OBJECT(&s->esdhc[i]), IMX6_ESDHC_CAPABILITIES,
"capareg", &err);
+ object_property_set_uint(OBJECT(&s->esdhc[i]), SDHCI_VENDOR_IMX,
+ "vendor", &err);
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
sysbus_realize(SYS_BUS_DEVICE(&s->esdhc[i]), &err);
if (err) {
error_propagate(errp, err);
diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c
index f8c564033e..6446034711 100644
--- a/hw/arm/fsl-imx6ul.c
+++ b/hw/arm/fsl-imx6ul.c
@@ -479,6 +479,8 @@ static void fsl_imx6ul_realize(DeviceState *dev, Error **errp)
FSL_IMX6UL_USDHC2_IRQ,
};
+ object_property_set_uint(OBJECT(&s->usdhc[i]), SDHCI_VENDOR_IMX,
+ "vendor", &error_abort);
sysbus_realize(SYS_BUS_DEVICE(&s->usdhc[i]), &error_abort);
sysbus_mmio_map(SYS_BUS_DEVICE(&s->usdhc[i]), 0,
diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c
index ca8b5cc358..b49d895a41 100644
--- a/hw/arm/fsl-imx7.c
+++ b/hw/arm/fsl-imx7.c
@@ -393,6 +393,8 @@ static void fsl_imx7_realize(DeviceState *dev, Error **errp)
FSL_IMX7_USDHC3_IRQ,
};
+ object_property_set_uint(OBJECT(&s->usdhc[i]), SDHCI_VENDOR_IMX,
+ "vendor", &error_abort);
sysbus_realize(SYS_BUS_DEVICE(&s->usdhc[i]), &error_abort);
sysbus_mmio_map(SYS_BUS_DEVICE(&s->usdhc[i]), 0,
diff --git a/hw/misc/imx6ul_ccm.c b/hw/misc/imx6ul_ccm.c
index a2fc1d0364..5e0661dacf 100644
--- a/hw/misc/imx6ul_ccm.c
+++ b/hw/misc/imx6ul_ccm.c
@@ -19,6 +19,62 @@
#include "trace.h"
+static const uint32_t ccm_mask[CCM_MAX] = {
+ [CCM_CCR] = 0xf01fef80,
+ [CCM_CCDR] = 0xfffeffff,
+ [CCM_CSR] = 0xffffffff,
+ [CCM_CCSR] = 0xfffffef2,
+ [CCM_CACRR] = 0xfffffff8,
+ [CCM_CBCDR] = 0xc1f8e000,
+ [CCM_CBCMR] = 0xfc03cfff,
+ [CCM_CSCMR1] = 0x80700000,
+ [CCM_CSCMR2] = 0xe01ff003,
+ [CCM_CSCDR1] = 0xfe00c780,
+ [CCM_CS1CDR] = 0xfe00fe00,
+ [CCM_CS2CDR] = 0xf8007000,
+ [CCM_CDCDR] = 0xf00fffff,
+ [CCM_CHSCCDR] = 0xfffc01ff,
+ [CCM_CSCDR2] = 0xfe0001ff,
+ [CCM_CSCDR3] = 0xffffc1ff,
+ [CCM_CDHIPR] = 0xffffffff,
+ [CCM_CTOR] = 0x00000000,
+ [CCM_CLPCR] = 0xf39ff01c,
+ [CCM_CISR] = 0xfb85ffbe,
+ [CCM_CIMR] = 0xfb85ffbf,
+ [CCM_CCOSR] = 0xfe00fe00,
+ [CCM_CGPR] = 0xfffc3fea,
+ [CCM_CCGR0] = 0x00000000,
+ [CCM_CCGR1] = 0x00000000,
+ [CCM_CCGR2] = 0x00000000,
+ [CCM_CCGR3] = 0x00000000,
+ [CCM_CCGR4] = 0x00000000,
+ [CCM_CCGR5] = 0x00000000,
+ [CCM_CCGR6] = 0x00000000,
+ [CCM_CMEOR] = 0xafffff1f,
+};
+
+static const uint32_t analog_mask[CCM_ANALOG_MAX] = {
+ [CCM_ANALOG_PLL_ARM] = 0xfff60f80,
+ [CCM_ANALOG_PLL_USB1] = 0xfffe0fbc,
+ [CCM_ANALOG_PLL_USB2] = 0xfffe0fbc,
+ [CCM_ANALOG_PLL_SYS] = 0xfffa0ffe,
+ [CCM_ANALOG_PLL_SYS_SS] = 0x00000000,
+ [CCM_ANALOG_PLL_SYS_NUM] = 0xc0000000,
+ [CCM_ANALOG_PLL_SYS_DENOM] = 0xc0000000,
+ [CCM_ANALOG_PLL_AUDIO] = 0xffe20f80,
+ [CCM_ANALOG_PLL_AUDIO_NUM] = 0xc0000000,
+ [CCM_ANALOG_PLL_AUDIO_DENOM] = 0xc0000000,
+ [CCM_ANALOG_PLL_VIDEO] = 0xffe20f80,
+ [CCM_ANALOG_PLL_VIDEO_NUM] = 0xc0000000,
+ [CCM_ANALOG_PLL_VIDEO_DENOM] = 0xc0000000,
+ [CCM_ANALOG_PLL_ENET] = 0xffc20ff0,
+ [CCM_ANALOG_PFD_480] = 0x40404040,
+ [CCM_ANALOG_PFD_528] = 0x40404040,
+ [PMU_MISC0] = 0x01fe8306,
+ [PMU_MISC1] = 0x07fcede0,
+ [PMU_MISC2] = 0x005f5f5f,
+};
+
static const char *imx6ul_ccm_reg_name(uint32_t reg)
{
static char unknown[20];
@@ -596,11 +652,8 @@ static void imx6ul_ccm_write(void *opaque, hwaddr offset, uint64_t value,
trace_ccm_write_reg(imx6ul_ccm_reg_name(index), (uint32_t)value);
- /*
- * We will do a better implementation later. In particular some bits
- * cannot be written to.
- */
- s->ccm[index] = (uint32_t)value;
+ s->ccm[index] = (s->ccm[index] & ccm_mask[index]) |
+ ((uint32_t)value & ~ccm_mask[index]);
}
static uint64_t imx6ul_analog_read(void *opaque, hwaddr offset, unsigned size)
@@ -737,7 +790,7 @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value,
* the REG_NAME register. So we change the value of the
* REG_NAME register, setting bits passed in the value.
*/
- s->analog[index - 1] |= value;
+ s->analog[index - 1] |= (value & ~analog_mask[index - 1]);
break;
case CCM_ANALOG_PLL_ARM_CLR:
case CCM_ANALOG_PLL_USB1_CLR:
@@ -762,7 +815,7 @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value,
* the REG_NAME register. So we change the value of the
* REG_NAME register, unsetting bits passed in the value.
*/
- s->analog[index - 2] &= ~value;
+ s->analog[index - 2] &= ~(value & ~analog_mask[index - 2]);
break;
case CCM_ANALOG_PLL_ARM_TOG:
case CCM_ANALOG_PLL_USB1_TOG:
@@ -787,14 +840,11 @@ static void imx6ul_analog_write(void *opaque, hwaddr offset, uint64_t value,
* the REG_NAME register. So we change the value of the
* REG_NAME register, toggling bits passed in the value.
*/
- s->analog[index - 3] ^= value;
+ s->analog[index - 3] ^= (value & ~analog_mask[index - 3]);
break;
default:
- /*
- * We will do a better implementation later. In particular some bits
- * cannot be written to.
- */
- s->analog[index] = value;
+ s->analog[index] = (s->analog[index] & analog_mask[index]) |
+ (value & ~analog_mask[index]);
break;
}
}
diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
index 25ebee7ec2..043ba61b86 100644
--- a/hw/net/ftgmac100.c
+++ b/hw/net/ftgmac100.c
@@ -80,6 +80,16 @@
#define FTGMAC100_APTC_TXPOLL_TIME_SEL (1 << 12)
/*
+ * DMA burst length and arbitration control register
+ */
+#define FTGMAC100_DBLAC_RXBURST_SIZE(x) (((x) >> 8) & 0x3)
+#define FTGMAC100_DBLAC_TXBURST_SIZE(x) (((x) >> 10) & 0x3)
+#define FTGMAC100_DBLAC_RXDES_SIZE(x) ((((x) >> 12) & 0xf) * 8)
+#define FTGMAC100_DBLAC_TXDES_SIZE(x) ((((x) >> 16) & 0xf) * 8)
+#define FTGMAC100_DBLAC_IFG_CNT(x) (((x) >> 20) & 0x7)
+#define FTGMAC100_DBLAC_IFG_INC (1 << 23)
+
+/*
* PHY control register
*/
#define FTGMAC100_PHYCR_MIIRD (1 << 26)
@@ -553,7 +563,7 @@ static void ftgmac100_do_tx(FTGMAC100State *s, uint32_t tx_ring,
if (bd.des0 & s->txdes0_edotr) {
addr = tx_ring;
} else {
- addr += sizeof(FTGMAC100Desc);
+ addr += FTGMAC100_DBLAC_TXDES_SIZE(s->dblac);
}
}
@@ -800,6 +810,18 @@ static void ftgmac100_write(void *opaque, hwaddr addr,
s->phydata = value & 0xffff;
break;
case FTGMAC100_DBLAC: /* DMA Burst Length and Arbitration Control */
+ if (FTGMAC100_DBLAC_TXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: transmit descriptor too small : %d bytes\n",
+ __func__, FTGMAC100_DBLAC_TXDES_SIZE(s->dblac));
+ break;
+ }
+ if (FTGMAC100_DBLAC_RXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: receive descriptor too small : %d bytes\n",
+ __func__, FTGMAC100_DBLAC_RXDES_SIZE(s->dblac));
+ break;
+ }
s->dblac = value;
break;
case FTGMAC100_REVR: /* Feature Register */
@@ -982,7 +1004,7 @@ static ssize_t ftgmac100_receive(NetClientState *nc, const uint8_t *buf,
if (bd.des0 & s->rxdes0_edorr) {
addr = s->rx_ring;
} else {
- addr += sizeof(FTGMAC100Desc);
+ addr += FTGMAC100_DBLAC_RXDES_SIZE(s->dblac);
}
}
s->rx_descriptor = addr;
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
index 7adcc9df65..eefedc252d 100644
--- a/hw/net/imx_fec.c
+++ b/hw/net/imx_fec.c
@@ -31,34 +31,11 @@
#include "qemu/module.h"
#include "net/checksum.h"
#include "net/eth.h"
+#include "trace.h"
/* For crc32 */
#include <zlib.h>
-#ifndef DEBUG_IMX_FEC
-#define DEBUG_IMX_FEC 0
-#endif
-
-#define FEC_PRINTF(fmt, args...) \
- do { \
- if (DEBUG_IMX_FEC) { \
- fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_FEC, \
- __func__, ##args); \
- } \
- } while (0)
-
-#ifndef DEBUG_IMX_PHY
-#define DEBUG_IMX_PHY 0
-#endif
-
-#define PHY_PRINTF(fmt, args...) \
- do { \
- if (DEBUG_IMX_PHY) { \
- fprintf(stderr, "[%s.phy]%s: " fmt , TYPE_IMX_FEC, \
- __func__, ##args); \
- } \
- } while (0)
-
#define IMX_MAX_DESC 1024
static const char *imx_default_reg_name(IMXFECState *s, uint32_t index)
@@ -262,43 +239,45 @@ static void imx_eth_update(IMXFECState *s);
* For now we don't handle any GPIO/interrupt line, so the OS will
* have to poll for the PHY status.
*/
-static void phy_update_irq(IMXFECState *s)
+static void imx_phy_update_irq(IMXFECState *s)
{
imx_eth_update(s);
}
-static void phy_update_link(IMXFECState *s)
+static void imx_phy_update_link(IMXFECState *s)
{
/* Autonegotiation status mirrors link status. */
if (qemu_get_queue(s->nic)->link_down) {
- PHY_PRINTF("link is down\n");
+ trace_imx_phy_update_link("down");
s->phy_status &= ~0x0024;
s->phy_int |= PHY_INT_DOWN;
} else {
- PHY_PRINTF("link is up\n");
+ trace_imx_phy_update_link("up");
s->phy_status |= 0x0024;
s->phy_int |= PHY_INT_ENERGYON;
s->phy_int |= PHY_INT_AUTONEG_COMPLETE;
}
- phy_update_irq(s);
+ imx_phy_update_irq(s);
}
static void imx_eth_set_link(NetClientState *nc)
{
- phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc)));
+ imx_phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc)));
}
-static void phy_reset(IMXFECState *s)
+static void imx_phy_reset(IMXFECState *s)
{
+ trace_imx_phy_reset();
+
s->phy_status = 0x7809;
s->phy_control = 0x3000;
s->phy_advertise = 0x01e1;
s->phy_int_mask = 0;
s->phy_int = 0;
- phy_update_link(s);
+ imx_phy_update_link(s);
}
-static uint32_t do_phy_read(IMXFECState *s, int reg)
+static uint32_t imx_phy_read(IMXFECState *s, int reg)
{
uint32_t val;
@@ -332,7 +311,7 @@ static uint32_t do_phy_read(IMXFECState *s, int reg)
case 29: /* Interrupt source. */
val = s->phy_int;
s->phy_int = 0;
- phy_update_irq(s);
+ imx_phy_update_irq(s);
break;
case 30: /* Interrupt mask */
val = s->phy_int_mask;
@@ -352,14 +331,14 @@ static uint32_t do_phy_read(IMXFECState *s, int reg)
break;
}
- PHY_PRINTF("read 0x%04x @ %d\n", val, reg);
+ trace_imx_phy_read(val, reg);
return val;
}
-static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
+static void imx_phy_write(IMXFECState *s, int reg, uint32_t val)
{
- PHY_PRINTF("write 0x%04x @ %d\n", val, reg);
+ trace_imx_phy_write(val, reg);
if (reg > 31) {
/* we only advertise one phy */
@@ -369,7 +348,7 @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
switch (reg) {
case 0: /* Basic Control */
if (val & 0x8000) {
- phy_reset(s);
+ imx_phy_reset(s);
} else {
s->phy_control = val & 0x7980;
/* Complete autonegotiation immediately. */
@@ -383,7 +362,7 @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
break;
case 30: /* Interrupt mask */
s->phy_int_mask = val & 0xff;
- phy_update_irq(s);
+ imx_phy_update_irq(s);
break;
case 17:
case 18:
@@ -402,6 +381,8 @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
static void imx_fec_read_bd(IMXFECBufDesc *bd, dma_addr_t addr)
{
dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd));
+
+ trace_imx_fec_read_bd(addr, bd->flags, bd->length, bd->data);
}
static void imx_fec_write_bd(IMXFECBufDesc *bd, dma_addr_t addr)
@@ -412,6 +393,9 @@ static void imx_fec_write_bd(IMXFECBufDesc *bd, dma_addr_t addr)
static void imx_enet_read_bd(IMXENETBufDesc *bd, dma_addr_t addr)
{
dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd));
+
+ trace_imx_enet_read_bd(addr, bd->flags, bd->length, bd->data,
+ bd->option, bd->status);
}
static void imx_enet_write_bd(IMXENETBufDesc *bd, dma_addr_t addr)
@@ -471,11 +455,11 @@ static void imx_fec_do_tx(IMXFECState *s)
int len;
imx_fec_read_bd(&bd, addr);
- FEC_PRINTF("tx_bd %x flags %04x len %d data %08x\n",
- addr, bd.flags, bd.length, bd.data);
if ((bd.flags & ENET_BD_R) == 0) {
+
/* Run out of descriptors to transmit. */
- FEC_PRINTF("tx_bd ran out of descriptors to transmit\n");
+ trace_imx_eth_tx_bd_busy();
+
break;
}
len = bd.length;
@@ -552,11 +536,11 @@ static void imx_enet_do_tx(IMXFECState *s, uint32_t index)
int len;
imx_enet_read_bd(&bd, addr);
- FEC_PRINTF("tx_bd %x flags %04x len %d data %08x option %04x "
- "status %04x\n", addr, bd.flags, bd.length, bd.data,
- bd.option, bd.status);
if ((bd.flags & ENET_BD_R) == 0) {
/* Run out of descriptors to transmit. */
+
+ trace_imx_eth_tx_bd_busy();
+
break;
}
len = bd.length;
@@ -633,7 +617,7 @@ static void imx_eth_enable_rx(IMXFECState *s, bool flush)
s->regs[ENET_RDAR] = (bd.flags & ENET_BD_E) ? ENET_RDAR_RDAR : 0;
if (!s->regs[ENET_RDAR]) {
- FEC_PRINTF("RX buffer full\n");
+ trace_imx_eth_rx_bd_full();
} else if (flush) {
qemu_flush_queued_packets(qemu_get_queue(s->nic));
}
@@ -676,7 +660,7 @@ static void imx_eth_reset(DeviceState *d)
memset(s->tx_descriptor, 0, sizeof(s->tx_descriptor));
/* We also reset the PHY */
- phy_reset(s);
+ imx_phy_reset(s);
}
static uint32_t imx_default_read(IMXFECState *s, uint32_t index)
@@ -774,8 +758,7 @@ static uint64_t imx_eth_read(void *opaque, hwaddr offset, unsigned size)
break;
}
- FEC_PRINTF("reg[%s] => 0x%" PRIx32 "\n", imx_eth_reg_name(s, index),
- value);
+ trace_imx_eth_read(index, imx_eth_reg_name(s, index), value);
return value;
}
@@ -884,8 +867,7 @@ static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value,
const bool single_tx_ring = !imx_eth_is_multi_tx_ring(s);
uint32_t index = offset >> 2;
- FEC_PRINTF("reg[%s] <= 0x%" PRIx32 "\n", imx_eth_reg_name(s, index),
- (uint32_t)value);
+ trace_imx_eth_write(index, imx_eth_reg_name(s, index), value);
switch (index) {
case ENET_EIR:
@@ -940,12 +922,12 @@ static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value,
if (extract32(value, 29, 1)) {
/* This is a read operation */
s->regs[ENET_MMFR] = deposit32(s->regs[ENET_MMFR], 0, 16,
- do_phy_read(s,
+ imx_phy_read(s,
extract32(value,
18, 10)));
} else {
/* This a write operation */
- do_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16));
+ imx_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16));
}
/* raise the interrupt as the PHY operation is done */
s->regs[ENET_EIR] |= ENET_INT_MII;
@@ -1053,8 +1035,6 @@ static bool imx_eth_can_receive(NetClientState *nc)
{
IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc));
- FEC_PRINTF("\n");
-
return !!s->regs[ENET_RDAR];
}
@@ -1071,7 +1051,7 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
unsigned int buf_len;
size_t size = len;
- FEC_PRINTF("len %d\n", (int)size);
+ trace_imx_fec_receive(size);
if (!s->regs[ENET_RDAR]) {
qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n",
@@ -1113,7 +1093,7 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
bd.length = buf_len;
size -= buf_len;
- FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length);
+ trace_imx_fec_receive_len(addr, bd.length);
/* The last 4 bytes are the CRC. */
if (size < 4) {
@@ -1131,7 +1111,9 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
if (size == 0) {
/* Last buffer in frame. */
bd.flags |= flags | ENET_BD_L;
- FEC_PRINTF("rx frame flags %04x\n", bd.flags);
+
+ trace_imx_fec_receive_last(bd.flags);
+
s->regs[ENET_EIR] |= ENET_INT_RXF;
} else {
s->regs[ENET_EIR] |= ENET_INT_RXB;
@@ -1164,7 +1146,7 @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf,
size_t size = len;
bool shift16 = s->regs[ENET_RACC] & ENET_RACC_SHIFT16;
- FEC_PRINTF("len %d\n", (int)size);
+ trace_imx_enet_receive(size);
if (!s->regs[ENET_RDAR]) {
qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n",
@@ -1210,7 +1192,7 @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf,
bd.length = buf_len;
size -= buf_len;
- FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length);
+ trace_imx_enet_receive_len(addr, bd.length);
/* The last 4 bytes are the CRC. */
if (size < 4) {
@@ -1246,7 +1228,9 @@ static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf,
if (size == 0) {
/* Last buffer in frame. */
bd.flags |= flags | ENET_BD_L;
- FEC_PRINTF("rx frame flags %04x\n", bd.flags);
+
+ trace_imx_enet_receive_last(bd.flags);
+
/* Indicate that we've updated the last buffer descriptor. */
bd.last_buffer = ENET_BD_BDU;
if (bd.option & ENET_BD_RX_INT) {
diff --git a/hw/net/trace-events b/hw/net/trace-events
index e18f883cfd..26700dad99 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -408,3 +408,21 @@ i82596_receive_packet(size_t sz) "len=%zu"
i82596_new_mac(const char *id_with_mac) "New MAC for: %s"
i82596_set_multicast(uint16_t count) "Added %d multicast entries"
i82596_channel_attention(void *s) "%p: Received CHANNEL ATTENTION"
+
+# imx_fec.c
+imx_phy_read(uint32_t val, int reg) "0x%04"PRIx32" <= reg[%d]"
+imx_phy_write(uint32_t val, int reg) "0x%04"PRIx32" => reg[%d]"
+imx_phy_update_link(const char *s) "%s"
+imx_phy_reset(void) ""
+imx_fec_read_bd(uint64_t addr, int flags, int len, int data) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x"
+imx_enet_read_bd(uint64_t addr, int flags, int len, int data, int options, int status) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x option 0x%04x status 0x%04x"
+imx_eth_tx_bd_busy(void) "tx_bd ran out of descriptors to transmit"
+imx_eth_rx_bd_full(void) "RX buffer is full"
+imx_eth_read(int reg, const char *reg_name, uint32_t value) "reg[%d:%s] => 0x%08"PRIx32
+imx_eth_write(int reg, const char *reg_name, uint64_t value) "reg[%d:%s] <= 0x%08"PRIx64
+imx_fec_receive(size_t size) "len %zu"
+imx_fec_receive_len(uint64_t addr, int len) "rx_bd 0x%"PRIx64" length %d"
+imx_fec_receive_last(int last) "rx frame flags 0x%04x"
+imx_enet_receive(size_t size) "len %zu"
+imx_enet_receive_len(uint64_t addr, int len) "rx_bd 0x%"PRIx64" length %d"
+imx_enet_receive_last(int last) "rx frame flags 0x%04x"
diff --git a/hw/sd/sdhci-internal.h b/hw/sd/sdhci-internal.h
index e7c8a523b5..e8c753d6d1 100644
--- a/hw/sd/sdhci-internal.h
+++ b/hw/sd/sdhci-internal.h
@@ -75,6 +75,7 @@
#define SDHC_CMD_INHIBIT 0x00000001
#define SDHC_DATA_INHIBIT 0x00000002
#define SDHC_DAT_LINE_ACTIVE 0x00000004
+#define SDHC_IMX_CLOCK_GATE_OFF 0x00000080
#define SDHC_DOING_WRITE 0x00000100
#define SDHC_DOING_READ 0x00000200
#define SDHC_SPACE_AVAILABLE 0x00000400
@@ -289,7 +290,10 @@ extern const VMStateDescription sdhci_vmstate;
#define ESDHC_MIX_CTRL 0x48
+
#define ESDHC_VENDOR_SPEC 0xc0
+#define ESDHC_IMX_FRC_SDCLK_ON (1 << 8)
+
#define ESDHC_DLL_CTRL 0x60
#define ESDHC_TUNING_CTRL 0xcc
@@ -326,6 +330,7 @@ extern const VMStateDescription sdhci_vmstate;
#define DEFINE_SDHCI_COMMON_PROPERTIES(_state) \
DEFINE_PROP_UINT8("sd-spec-version", _state, sd_spec_version, 2), \
DEFINE_PROP_UINT8("uhs", _state, uhs_mode, UHS_NOT_SUPPORTED), \
+ DEFINE_PROP_UINT8("vendor", _state, vendor, SDHCI_VENDOR_NONE), \
\
/* Capabilities registers provide information on supported
* features of this specific host controller implementation */ \
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index 1b75d7bab9..eb2be6529e 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -1569,11 +1569,13 @@ static uint64_t usdhc_read(void *opaque, hwaddr offset, unsigned size)
}
break;
+ case ESDHC_VENDOR_SPEC:
+ ret = s->vendor_spec;
+ break;
case ESDHC_DLL_CTRL:
case ESDHC_TUNE_CTRL_STATUS:
case ESDHC_UNDOCUMENTED_REG27:
case ESDHC_TUNING_CTRL:
- case ESDHC_VENDOR_SPEC:
case ESDHC_MIX_CTRL:
case ESDHC_WTMK_LVL:
ret = 0;
@@ -1596,7 +1598,21 @@ usdhc_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
case ESDHC_UNDOCUMENTED_REG27:
case ESDHC_TUNING_CTRL:
case ESDHC_WTMK_LVL:
+ break;
+
case ESDHC_VENDOR_SPEC:
+ s->vendor_spec = value;
+ switch (s->vendor) {
+ case SDHCI_VENDOR_IMX:
+ if (value & ESDHC_IMX_FRC_SDCLK_ON) {
+ s->prnsts &= ~SDHC_IMX_CLOCK_GATE_OFF;
+ } else {
+ s->prnsts |= SDHC_IMX_CLOCK_GATE_OFF;
+ }
+ break;
+ default:
+ break;
+ }
break;
case SDHC_HOSTCTL:
diff --git a/include/hw/sd/sdhci.h b/include/hw/sd/sdhci.h
index c6868c9699..5d9275f3d6 100644
--- a/include/hw/sd/sdhci.h
+++ b/include/hw/sd/sdhci.h
@@ -74,6 +74,7 @@ typedef struct SDHCIState {
uint16_t acmd12errsts; /* Auto CMD12 error status register */
uint16_t hostctl2; /* Host Control 2 */
uint64_t admasysaddr; /* ADMA System Address Register */
+ uint16_t vendor_spec; /* Vendor specific register */
/* Read-only registers */
uint64_t capareg; /* Capabilities Register */
@@ -96,8 +97,12 @@ typedef struct SDHCIState {
uint32_t quirks;
uint8_t sd_spec_version;
uint8_t uhs_mode;
+ uint8_t vendor; /* For vendor specific functionality */
} SDHCIState;
+#define SDHCI_VENDOR_NONE 0
+#define SDHCI_VENDOR_IMX 1
+
/*
* Controller does not provide transfer-complete interrupt when not
* busy.
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 32bec156f2..5b7a36b5d7 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1245,6 +1245,10 @@ void arm_cpu_post_init(Object *obj)
if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) {
qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property);
}
+
+ if (kvm_enabled()) {
+ kvm_arm_add_vcpu_properties(obj);
+ }
}
static void arm_cpu_finalizefn(Object *obj)
@@ -2029,7 +2033,6 @@ static void arm_max_initfn(Object *obj)
if (kvm_enabled()) {
kvm_arm_set_cpu_features_from_host(cpu);
- kvm_arm_add_vcpu_properties(obj);
} else {
cortex_a15_initfn(obj);
@@ -2183,7 +2186,6 @@ static void arm_host_initfn(Object *obj)
if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
aarch64_add_sve_properties(obj);
}
- kvm_arm_add_vcpu_properties(obj);
arm_cpu_post_init(obj);
}
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index cbc5c3868f..778cecc2e6 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -592,7 +592,6 @@ static void aarch64_max_initfn(Object *obj)
if (kvm_enabled()) {
kvm_arm_set_cpu_features_from_host(cpu);
- kvm_arm_add_vcpu_properties(obj);
} else {
uint64_t t;
uint32_t u;
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 4bdbe6dcac..eef3bbd1cc 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -194,17 +194,18 @@ static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp)
/* KVM VCPU properties should be prefixed with "kvm-". */
void kvm_arm_add_vcpu_properties(Object *obj)
{
- if (!kvm_enabled()) {
- return;
- }
+ ARMCPU *cpu = ARM_CPU(obj);
+ CPUARMState *env = &cpu->env;
- ARM_CPU(obj)->kvm_adjvtime = true;
- object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get,
- kvm_no_adjvtime_set);
- object_property_set_description(obj, "kvm-no-adjvtime",
- "Set on to disable the adjustment of "
- "the virtual counter. VM stopped time "
- "will be counted.");
+ if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) {
+ cpu->kvm_adjvtime = true;
+ object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get,
+ kvm_no_adjvtime_set);
+ object_property_set_description(obj, "kvm-no-adjvtime",
+ "Set on to disable the adjustment of "
+ "the virtual counter. VM stopped time "
+ "will be counted.");
+ }
}
bool kvm_arm_pmu_supported(CPUState *cpu)
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index bd1b0e13f7..6d890b2161 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -397,3 +397,133 @@ VCVT_FU_2sh 1111 001 1 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt
# So we have a single decode line and check the cmode/op in the
# trans function.
Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
+
+######################################################################
+# Within the "two registers, or three registers of different lengths"
+# grouping ([23,4]=0b10), bits [21:20] are either part of the opcode
+# decode: 0b11 for VEXT, two-reg-misc, VTBL, and duplicate-scalar;
+# or they are a size field for the three-reg-different-lengths and
+# two-reg-and-scalar insn groups (where size cannot be 0b11). This
+# is slightly awkward for decodetree: we handle it with this
+# non-exclusive group which contains within it two exclusive groups:
+# one for the size=0b11 patterns, and one for the size-not-0b11
+# patterns. This allows us to check that none of the insns within
+# each subgroup accidentally overlap each other. Note that all the
+# trans functions for the size-not-0b11 patterns must check and
+# return false for size==3.
+######################################################################
+{
+ [
+ ##################################################################
+ # Miscellaneous size=0b11 insns
+ ##################################################################
+ VEXT 1111 001 0 1 . 11 .... .... imm:4 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+ VTBL 1111 001 1 1 . 11 .... .... 10 len:2 . op:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+ VDUP_scalar 1111 001 1 1 . 11 index:3 1 .... 11 000 q:1 . 0 .... \
+ vm=%vm_dp vd=%vd_dp size=0
+ VDUP_scalar 1111 001 1 1 . 11 index:2 10 .... 11 000 q:1 . 0 .... \
+ vm=%vm_dp vd=%vd_dp size=1
+ VDUP_scalar 1111 001 1 1 . 11 index:1 100 .... 11 000 q:1 . 0 .... \
+ vm=%vm_dp vd=%vd_dp size=2
+ ]
+
+ # Subgroup for size != 0b11
+ [
+ ##################################################################
+ # 3-reg-different-length grouping:
+ # 1111 001 U 1 D sz!=11 Vn:4 Vd:4 opc:4 N 0 M 0 Vm:4
+ ##################################################################
+
+ &3diff vm vn vd size
+
+ @3diff .... ... . . . size:2 .... .... .... . . . . .... \
+ &3diff vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+ VADDL_S_3d 1111 001 0 1 . .. .... .... 0000 . 0 . 0 .... @3diff
+ VADDL_U_3d 1111 001 1 1 . .. .... .... 0000 . 0 . 0 .... @3diff
+
+ VADDW_S_3d 1111 001 0 1 . .. .... .... 0001 . 0 . 0 .... @3diff
+ VADDW_U_3d 1111 001 1 1 . .. .... .... 0001 . 0 . 0 .... @3diff
+
+ VSUBL_S_3d 1111 001 0 1 . .. .... .... 0010 . 0 . 0 .... @3diff
+ VSUBL_U_3d 1111 001 1 1 . .. .... .... 0010 . 0 . 0 .... @3diff
+
+ VSUBW_S_3d 1111 001 0 1 . .. .... .... 0011 . 0 . 0 .... @3diff
+ VSUBW_U_3d 1111 001 1 1 . .. .... .... 0011 . 0 . 0 .... @3diff
+
+ VADDHN_3d 1111 001 0 1 . .. .... .... 0100 . 0 . 0 .... @3diff
+ VRADDHN_3d 1111 001 1 1 . .. .... .... 0100 . 0 . 0 .... @3diff
+
+ VABAL_S_3d 1111 001 0 1 . .. .... .... 0101 . 0 . 0 .... @3diff
+ VABAL_U_3d 1111 001 1 1 . .. .... .... 0101 . 0 . 0 .... @3diff
+
+ VSUBHN_3d 1111 001 0 1 . .. .... .... 0110 . 0 . 0 .... @3diff
+ VRSUBHN_3d 1111 001 1 1 . .. .... .... 0110 . 0 . 0 .... @3diff
+
+ VABDL_S_3d 1111 001 0 1 . .. .... .... 0111 . 0 . 0 .... @3diff
+ VABDL_U_3d 1111 001 1 1 . .. .... .... 0111 . 0 . 0 .... @3diff
+
+ VMLAL_S_3d 1111 001 0 1 . .. .... .... 1000 . 0 . 0 .... @3diff
+ VMLAL_U_3d 1111 001 1 1 . .. .... .... 1000 . 0 . 0 .... @3diff
+
+ VQDMLAL_3d 1111 001 0 1 . .. .... .... 1001 . 0 . 0 .... @3diff
+
+ VMLSL_S_3d 1111 001 0 1 . .. .... .... 1010 . 0 . 0 .... @3diff
+ VMLSL_U_3d 1111 001 1 1 . .. .... .... 1010 . 0 . 0 .... @3diff
+
+ VQDMLSL_3d 1111 001 0 1 . .. .... .... 1011 . 0 . 0 .... @3diff
+
+ VMULL_S_3d 1111 001 0 1 . .. .... .... 1100 . 0 . 0 .... @3diff
+ VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff
+
+ VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff
+
+ VMULL_P_3d 1111 001 0 1 . .. .... .... 1110 . 0 . 0 .... @3diff
+
+ ##################################################################
+ # 2-regs-plus-scalar grouping:
+ # 1111 001 Q 1 D sz!=11 Vn:4 Vd:4 opc:4 N 1 M 0 Vm:4
+ ##################################################################
+ &2scalar vm vn vd size q
+
+ @2scalar .... ... q:1 . . size:2 .... .... .... . . . . .... \
+ &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp
+ # For the 'long' ops the Q bit is part of insn decode
+ @2scalar_q0 .... ... . . . size:2 .... .... .... . . . . .... \
+ &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
+
+ VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar
+ VMLA_F_2sc 1111 001 . 1 . .. .... .... 0001 . 1 . 0 .... @2scalar
+
+ VMLAL_S_2sc 1111 001 0 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0
+ VMLAL_U_2sc 1111 001 1 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0
+
+ VQDMLAL_2sc 1111 001 0 1 . .. .... .... 0011 . 1 . 0 .... @2scalar_q0
+
+ VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar
+ VMLS_F_2sc 1111 001 . 1 . .. .... .... 0101 . 1 . 0 .... @2scalar
+
+ VMLSL_S_2sc 1111 001 0 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0
+ VMLSL_U_2sc 1111 001 1 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0
+
+ VQDMLSL_2sc 1111 001 0 1 . .. .... .... 0111 . 1 . 0 .... @2scalar_q0
+
+ VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
+ VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar
+
+ VMULL_S_2sc 1111 001 0 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0
+ VMULL_U_2sc 1111 001 1 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0
+
+ VQDMULL_2sc 1111 001 0 1 . .. .... .... 1011 . 1 . 0 .... @2scalar_q0
+
+ VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar
+ VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar
+
+ VQRDMLAH_2sc 1111 001 . 1 . .. .... .... 1110 . 1 . 0 .... @2scalar
+ VQRDMLSH_2sc 1111 001 . 1 . .. .... .... 1111 . 1 . 0 .... @2scalar
+ ]
+}
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 664d361260..a5aa56bbde 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -1329,6 +1329,7 @@ static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
neon_load_reg64(tmp, a->vm + pass);
fn(tmp, cpu_env, tmp, constimm);
neon_store_reg64(tmp, a->vd + pass);
+ tcg_temp_free_i64(tmp);
}
tcg_temp_free_i64(constimm);
return true;
@@ -1624,6 +1625,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
tmp = tcg_temp_new_i64();
widenfn(tmp, rm0);
+ tcg_temp_free_i32(rm0);
if (a->shift != 0) {
tcg_gen_shli_i64(tmp, tmp, a->shift);
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
@@ -1631,6 +1633,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
neon_store_reg64(tmp, a->vd);
widenfn(tmp, rm1);
+ tcg_temp_free_i32(rm1);
if (a->shift != 0) {
tcg_gen_shli_i64(tmp, tmp, a->shift);
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
@@ -1642,7 +1645,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
{
- NeonGenWidenFn *widenfn[] = {
+ static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_s8,
gen_helper_neon_widen_s16,
tcg_gen_ext_i32_i64,
@@ -1652,7 +1655,7 @@ static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
{
- NeonGenWidenFn *widenfn[] = {
+ static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_u8,
gen_helper_neon_widen_u16,
tcg_gen_extu_i32_i64,
@@ -1826,3 +1829,1144 @@ static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
}
return do_1reg_imm(s, a, fn);
}
+
+static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
+ NeonGenWidenFn *widenfn,
+ NeonGenTwo64OpFn *opfn,
+ bool src1_wide)
+{
+ /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
+ TCGv_i64 rn0_64, rn1_64, rm_64;
+ TCGv_i32 rm;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!widenfn || !opfn) {
+ /* size == 3 case, which is an entirely different insn group */
+ return false;
+ }
+
+ if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ rn0_64 = tcg_temp_new_i64();
+ rn1_64 = tcg_temp_new_i64();
+ rm_64 = tcg_temp_new_i64();
+
+ if (src1_wide) {
+ neon_load_reg64(rn0_64, a->vn);
+ } else {
+ TCGv_i32 tmp = neon_load_reg(a->vn, 0);
+ widenfn(rn0_64, tmp);
+ tcg_temp_free_i32(tmp);
+ }
+ rm = neon_load_reg(a->vm, 0);
+
+ widenfn(rm_64, rm);
+ tcg_temp_free_i32(rm);
+ opfn(rn0_64, rn0_64, rm_64);
+
+ /*
+ * Load second pass inputs before storing the first pass result, to
+ * avoid incorrect results if a narrow input overlaps with the result.
+ */
+ if (src1_wide) {
+ neon_load_reg64(rn1_64, a->vn + 1);
+ } else {
+ TCGv_i32 tmp = neon_load_reg(a->vn, 1);
+ widenfn(rn1_64, tmp);
+ tcg_temp_free_i32(tmp);
+ }
+ rm = neon_load_reg(a->vm, 1);
+
+ neon_store_reg64(rn0_64, a->vd);
+
+ widenfn(rm_64, rm);
+ tcg_temp_free_i32(rm);
+ opfn(rn1_64, rn1_64, rm_64);
+ neon_store_reg64(rn1_64, a->vd + 1);
+
+ tcg_temp_free_i64(rn0_64);
+ tcg_temp_free_i64(rn1_64);
+ tcg_temp_free_i64(rm_64);
+
+ return true;
+}
+
+#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
+ { \
+ static NeonGenWidenFn * const widenfn[] = { \
+ gen_helper_neon_widen_##S##8, \
+ gen_helper_neon_widen_##S##16, \
+ tcg_gen_##EXT##_i32_i64, \
+ NULL, \
+ }; \
+ static NeonGenTwo64OpFn * const addfn[] = { \
+ gen_helper_neon_##OP##l_u16, \
+ gen_helper_neon_##OP##l_u32, \
+ tcg_gen_##OP##_i64, \
+ NULL, \
+ }; \
+ return do_prewiden_3d(s, a, widenfn[a->size], \
+ addfn[a->size], SRC1WIDE); \
+ }
+
+DO_PREWIDEN(VADDL_S, s, ext, add, false)
+DO_PREWIDEN(VADDL_U, u, extu, add, false)
+DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
+DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
+DO_PREWIDEN(VADDW_S, s, ext, add, true)
+DO_PREWIDEN(VADDW_U, u, extu, add, true)
+DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
+DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
+
+static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
+ NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
+{
+ /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
+ TCGv_i64 rn_64, rm_64;
+ TCGv_i32 rd0, rd1;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn || !narrowfn) {
+ /* size == 3 case, which is an entirely different insn group */
+ return false;
+ }
+
+ if ((a->vn | a->vm) & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ rn_64 = tcg_temp_new_i64();
+ rm_64 = tcg_temp_new_i64();
+ rd0 = tcg_temp_new_i32();
+ rd1 = tcg_temp_new_i32();
+
+ neon_load_reg64(rn_64, a->vn);
+ neon_load_reg64(rm_64, a->vm);
+
+ opfn(rn_64, rn_64, rm_64);
+
+ narrowfn(rd0, rn_64);
+
+ neon_load_reg64(rn_64, a->vn + 1);
+ neon_load_reg64(rm_64, a->vm + 1);
+
+ opfn(rn_64, rn_64, rm_64);
+
+ narrowfn(rd1, rn_64);
+
+ neon_store_reg(a->vd, 0, rd0);
+ neon_store_reg(a->vd, 1, rd1);
+
+ tcg_temp_free_i64(rn_64);
+ tcg_temp_free_i64(rm_64);
+
+ return true;
+}
+
+#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
+ { \
+ static NeonGenTwo64OpFn * const addfn[] = { \
+ gen_helper_neon_##OP##l_u16, \
+ gen_helper_neon_##OP##l_u32, \
+ tcg_gen_##OP##_i64, \
+ NULL, \
+ }; \
+ static NeonGenNarrowFn * const narrowfn[] = { \
+ gen_helper_neon_##NARROWTYPE##_high_u8, \
+ gen_helper_neon_##NARROWTYPE##_high_u16, \
+ EXTOP, \
+ NULL, \
+ }; \
+ return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \
+ }
+
+static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
+{
+ tcg_gen_addi_i64(rn, rn, 1u << 31);
+ tcg_gen_extrh_i64_i32(rd, rn);
+}
+
+DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
+DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
+DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
+DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
+
+static bool do_long_3d(DisasContext *s, arg_3diff *a,
+ NeonGenTwoOpWidenFn *opfn,
+ NeonGenTwo64OpFn *accfn)
+{
+ /*
+ * 3-regs different lengths, long operations.
+ * These perform an operation on two inputs that returns a double-width
+ * result, and then possibly perform an accumulation operation of
+ * that result into the double-width destination.
+ */
+ TCGv_i64 rd0, rd1, tmp;
+ TCGv_i32 rn, rm;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn) {
+ /* size == 3 case, which is an entirely different insn group */
+ return false;
+ }
+
+ if (a->vd & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ rd0 = tcg_temp_new_i64();
+ rd1 = tcg_temp_new_i64();
+
+ rn = neon_load_reg(a->vn, 0);
+ rm = neon_load_reg(a->vm, 0);
+ opfn(rd0, rn, rm);
+ tcg_temp_free_i32(rn);
+ tcg_temp_free_i32(rm);
+
+ rn = neon_load_reg(a->vn, 1);
+ rm = neon_load_reg(a->vm, 1);
+ opfn(rd1, rn, rm);
+ tcg_temp_free_i32(rn);
+ tcg_temp_free_i32(rm);
+
+ /* Don't store results until after all loads: they might overlap */
+ if (accfn) {
+ tmp = tcg_temp_new_i64();
+ neon_load_reg64(tmp, a->vd);
+ accfn(tmp, tmp, rd0);
+ neon_store_reg64(tmp, a->vd);
+ neon_load_reg64(tmp, a->vd + 1);
+ accfn(tmp, tmp, rd1);
+ neon_store_reg64(tmp, a->vd + 1);
+ tcg_temp_free_i64(tmp);
+ } else {
+ neon_store_reg64(rd0, a->vd);
+ neon_store_reg64(rd1, a->vd + 1);
+ }
+
+ tcg_temp_free_i64(rd0);
+ tcg_temp_free_i64(rd1);
+
+ return true;
+}
+
+static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_abdl_s16,
+ gen_helper_neon_abdl_s32,
+ gen_helper_neon_abdl_s64,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_abdl_u16,
+ gen_helper_neon_abdl_u32,
+ gen_helper_neon_abdl_u64,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_abdl_s16,
+ gen_helper_neon_abdl_s32,
+ gen_helper_neon_abdl_s64,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const addfn[] = {
+ gen_helper_neon_addl_u16,
+ gen_helper_neon_addl_u32,
+ tcg_gen_add_i64,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
+}
+
+static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_abdl_u16,
+ gen_helper_neon_abdl_u32,
+ gen_helper_neon_abdl_u64,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const addfn[] = {
+ gen_helper_neon_addl_u16,
+ gen_helper_neon_addl_u32,
+ tcg_gen_add_i64,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
+}
+
+static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+ TCGv_i32 lo = tcg_temp_new_i32();
+ TCGv_i32 hi = tcg_temp_new_i32();
+
+ tcg_gen_muls2_i32(lo, hi, rn, rm);
+ tcg_gen_concat_i32_i64(rd, lo, hi);
+
+ tcg_temp_free_i32(lo);
+ tcg_temp_free_i32(hi);
+}
+
+static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+ TCGv_i32 lo = tcg_temp_new_i32();
+ TCGv_i32 hi = tcg_temp_new_i32();
+
+ tcg_gen_mulu2_i32(lo, hi, rn, rm);
+ tcg_gen_concat_i32_i64(rd, lo, hi);
+
+ tcg_temp_free_i32(lo);
+ tcg_temp_free_i32(hi);
+}
+
+static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_mull_s8,
+ gen_helper_neon_mull_s16,
+ gen_mull_s32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_mull_u8,
+ gen_helper_neon_mull_u16,
+ gen_mull_u32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+#define DO_VMLAL(INSN,MULL,ACC) \
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
+ { \
+ static NeonGenTwoOpWidenFn * const opfn[] = { \
+ gen_helper_neon_##MULL##8, \
+ gen_helper_neon_##MULL##16, \
+ gen_##MULL##32, \
+ NULL, \
+ }; \
+ static NeonGenTwo64OpFn * const accfn[] = { \
+ gen_helper_neon_##ACC##l_u16, \
+ gen_helper_neon_##ACC##l_u32, \
+ tcg_gen_##ACC##_i64, \
+ NULL, \
+ }; \
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \
+ }
+
+DO_VMLAL(VMLAL_S,mull_s,add)
+DO_VMLAL(VMLAL_U,mull_u,add)
+DO_VMLAL(VMLSL_S,mull_s,sub)
+DO_VMLAL(VMLSL_U,mull_u,sub)
+
+static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+ gen_helper_neon_mull_s16(rd, rn, rm);
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
+}
+
+static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+ gen_mull_s32(rd, rn, rm);
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
+}
+
+static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
+}
+
+static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
+}
+
+static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const accfn[] = {
+ NULL,
+ gen_VQDMLAL_acc_16,
+ gen_VQDMLAL_acc_32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ gen_helper_neon_negl_u32(rm, rm);
+ gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
+}
+
+static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ tcg_gen_neg_i64(rm, rm);
+ gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
+}
+
+static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const accfn[] = {
+ NULL,
+ gen_VQDMLSL_acc_16,
+ gen_VQDMLSL_acc_32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
+{
+ gen_helper_gvec_3 *fn_gvec;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->vd & 1) {
+ return false;
+ }
+
+ switch (a->size) {
+ case 0:
+ fn_gvec = gen_helper_neon_pmull_h;
+ break;
+ case 2:
+ if (!dc_isar_feature(aa32_pmull, s)) {
+ return false;
+ }
+ fn_gvec = gen_helper_gvec_pmull_q;
+ break;
+ default:
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0),
+ neon_reg_offset(a->vn, 0),
+ neon_reg_offset(a->vm, 0),
+ 16, 16, 0, fn_gvec);
+ return true;
+}
+
+static void gen_neon_dup_low16(TCGv_i32 var)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_ext16u_i32(var, var);
+ tcg_gen_shli_i32(tmp, var, 16);
+ tcg_gen_or_i32(var, var, tmp);
+ tcg_temp_free_i32(tmp);
+}
+
+static void gen_neon_dup_high16(TCGv_i32 var)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_andi_i32(var, var, 0xffff0000);
+ tcg_gen_shri_i32(tmp, var, 16);
+ tcg_gen_or_i32(var, var, tmp);
+ tcg_temp_free_i32(tmp);
+}
+
+static inline TCGv_i32 neon_get_scalar(int size, int reg)
+{
+ TCGv_i32 tmp;
+ if (size == 1) {
+ tmp = neon_load_reg(reg & 7, reg >> 4);
+ if (reg & 8) {
+ gen_neon_dup_high16(tmp);
+ } else {
+ gen_neon_dup_low16(tmp);
+ }
+ } else {
+ tmp = neon_load_reg(reg & 15, reg >> 4);
+ }
+ return tmp;
+}
+
+static bool do_2scalar(DisasContext *s, arg_2scalar *a,
+ NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
+{
+ /*
+ * Two registers and a scalar: perform an operation between
+ * the input elements and the scalar, and then possibly
+ * perform an accumulation operation of that result into the
+ * destination.
+ */
+ TCGv_i32 scalar;
+ int pass;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn) {
+ /* Bad size (including size == 3, which is a different insn group) */
+ return false;
+ }
+
+ if (a->q && ((a->vd | a->vn) & 1)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ scalar = neon_get_scalar(a->size, a->vm);
+
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+ TCGv_i32 tmp = neon_load_reg(a->vn, pass);
+ opfn(tmp, tmp, scalar);
+ if (accfn) {
+ TCGv_i32 rd = neon_load_reg(a->vd, pass);
+ accfn(tmp, rd, tmp);
+ tcg_temp_free_i32(rd);
+ }
+ neon_store_reg(a->vd, pass, tmp);
+ }
+ tcg_temp_free_i32(scalar);
+ return true;
+}
+
+static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mul_u16,
+ tcg_gen_mul_i32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mul_u16,
+ tcg_gen_mul_i32,
+ NULL,
+ };
+ static NeonGenTwoOpFn * const accfn[] = {
+ NULL,
+ gen_helper_neon_add_u16,
+ tcg_gen_add_i32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mul_u16,
+ tcg_gen_mul_i32,
+ NULL,
+ };
+ static NeonGenTwoOpFn * const accfn[] = {
+ NULL,
+ gen_helper_neon_sub_u16,
+ tcg_gen_sub_i32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+/*
+ * Rather than have a float-specific version of do_2scalar just for
+ * three insns, we wrap a NeonGenTwoSingleOpFn to turn it into
+ * a NeonGenTwoOpFn.
+ */
+#define WRAP_FP_FN(WRAPNAME, FUNC) \
+ static void WRAPNAME(TCGv_i32 rd, TCGv_i32 rn, TCGv_i32 rm) \
+ { \
+ TCGv_ptr fpstatus = get_fpstatus_ptr(1); \
+ FUNC(rd, rn, rm, fpstatus); \
+ tcg_temp_free_ptr(fpstatus); \
+ }
+
+WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls)
+WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds)
+WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs)
+
+static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ NULL, /* TODO: fp16 support */
+ gen_VMUL_F_mul,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ NULL, /* TODO: fp16 support */
+ gen_VMUL_F_mul,
+ NULL,
+ };
+ static NeonGenTwoOpFn * const accfn[] = {
+ NULL,
+ NULL, /* TODO: fp16 support */
+ gen_VMUL_F_add,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ NULL, /* TODO: fp16 support */
+ gen_VMUL_F_mul,
+ NULL,
+ };
+ static NeonGenTwoOpFn * const accfn[] = {
+ NULL,
+ NULL, /* TODO: fp16 support */
+ gen_VMUL_F_sub,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
+WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
+WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
+WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
+
+static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_VQDMULH_16,
+ gen_VQDMULH_32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_VQRDMULH_16,
+ gen_VQRDMULH_32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
+ NeonGenThreeOpEnvFn *opfn)
+{
+ /*
+ * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
+ * performs a kind of fused op-then-accumulate using a helper
+ * function that takes all of rd, rn and the scalar at once.
+ */
+ TCGv_i32 scalar;
+ int pass;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_rdm, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn) {
+ /* Bad size (including size == 3, which is a different insn group) */
+ return false;
+ }
+
+ if (a->q && ((a->vd | a->vn) & 1)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ scalar = neon_get_scalar(a->size, a->vm);
+
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+ TCGv_i32 rn = neon_load_reg(a->vn, pass);
+ TCGv_i32 rd = neon_load_reg(a->vd, pass);
+ opfn(rd, cpu_env, rn, scalar, rd);
+ tcg_temp_free_i32(rn);
+ neon_store_reg(a->vd, pass, rd);
+ }
+ tcg_temp_free_i32(scalar);
+
+ return true;
+}
+
+static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenThreeOpEnvFn *opfn[] = {
+ NULL,
+ gen_helper_neon_qrdmlah_s16,
+ gen_helper_neon_qrdmlah_s32,
+ NULL,
+ };
+ return do_vqrdmlah_2sc(s, a, opfn[a->size]);
+}
+
+static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenThreeOpEnvFn *opfn[] = {
+ NULL,
+ gen_helper_neon_qrdmlsh_s16,
+ gen_helper_neon_qrdmlsh_s32,
+ NULL,
+ };
+ return do_vqrdmlah_2sc(s, a, opfn[a->size]);
+}
+
+static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
+ NeonGenTwoOpWidenFn *opfn,
+ NeonGenTwo64OpFn *accfn)
+{
+ /*
+ * Two registers and a scalar, long operations: perform an
+ * operation on the input elements and the scalar which produces
+ * a double-width result, and then possibly perform an accumulation
+ * operation of that result into the destination.
+ */
+ TCGv_i32 scalar, rn;
+ TCGv_i64 rn0_64, rn1_64;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn) {
+ /* Bad size (including size == 3, which is a different insn group) */
+ return false;
+ }
+
+ if (a->vd & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ scalar = neon_get_scalar(a->size, a->vm);
+
+ /* Load all inputs before writing any outputs, in case of overlap */
+ rn = neon_load_reg(a->vn, 0);
+ rn0_64 = tcg_temp_new_i64();
+ opfn(rn0_64, rn, scalar);
+ tcg_temp_free_i32(rn);
+
+ rn = neon_load_reg(a->vn, 1);
+ rn1_64 = tcg_temp_new_i64();
+ opfn(rn1_64, rn, scalar);
+ tcg_temp_free_i32(rn);
+ tcg_temp_free_i32(scalar);
+
+ if (accfn) {
+ TCGv_i64 t64 = tcg_temp_new_i64();
+ neon_load_reg64(t64, a->vd);
+ accfn(t64, t64, rn0_64);
+ neon_store_reg64(t64, a->vd);
+ neon_load_reg64(t64, a->vd + 1);
+ accfn(t64, t64, rn1_64);
+ neon_store_reg64(t64, a->vd + 1);
+ tcg_temp_free_i64(t64);
+ } else {
+ neon_store_reg64(rn0_64, a->vd);
+ neon_store_reg64(rn1_64, a->vd + 1);
+ }
+ tcg_temp_free_i64(rn0_64);
+ tcg_temp_free_i64(rn1_64);
+ return true;
+}
+
+static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mull_s16,
+ gen_mull_s32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mull_u16,
+ gen_mull_u32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+#define DO_VMLAL_2SC(INSN, MULL, ACC) \
+ static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \
+ { \
+ static NeonGenTwoOpWidenFn * const opfn[] = { \
+ NULL, \
+ gen_helper_neon_##MULL##16, \
+ gen_##MULL##32, \
+ NULL, \
+ }; \
+ static NeonGenTwo64OpFn * const accfn[] = { \
+ NULL, \
+ gen_helper_neon_##ACC##l_u32, \
+ tcg_gen_##ACC##_i64, \
+ NULL, \
+ }; \
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \
+ }
+
+DO_VMLAL_2SC(VMLAL_S, mull_s, add)
+DO_VMLAL_2SC(VMLAL_U, mull_u, add)
+DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
+DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
+
+static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const accfn[] = {
+ NULL,
+ gen_VQDMLAL_acc_16,
+ gen_VQDMLAL_acc_32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const accfn[] = {
+ NULL,
+ gen_VQDMLSL_acc_16,
+ gen_VQDMLSL_acc_32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (a->imm > 7 && !a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (!a->q) {
+ /* Extract 64 bits from <Vm:Vn> */
+ TCGv_i64 left, right, dest;
+
+ left = tcg_temp_new_i64();
+ right = tcg_temp_new_i64();
+ dest = tcg_temp_new_i64();
+
+ neon_load_reg64(right, a->vn);
+ neon_load_reg64(left, a->vm);
+ tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
+ neon_store_reg64(dest, a->vd);
+
+ tcg_temp_free_i64(left);
+ tcg_temp_free_i64(right);
+ tcg_temp_free_i64(dest);
+ } else {
+ /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
+ TCGv_i64 left, middle, right, destleft, destright;
+
+ left = tcg_temp_new_i64();
+ middle = tcg_temp_new_i64();
+ right = tcg_temp_new_i64();
+ destleft = tcg_temp_new_i64();
+ destright = tcg_temp_new_i64();
+
+ if (a->imm < 8) {
+ neon_load_reg64(right, a->vn);
+ neon_load_reg64(middle, a->vn + 1);
+ tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
+ neon_load_reg64(left, a->vm);
+ tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
+ } else {
+ neon_load_reg64(right, a->vn + 1);
+ neon_load_reg64(middle, a->vm);
+ tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
+ neon_load_reg64(left, a->vm + 1);
+ tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
+ }
+
+ neon_store_reg64(destright, a->vd);
+ neon_store_reg64(destleft, a->vd + 1);
+
+ tcg_temp_free_i64(destright);
+ tcg_temp_free_i64(destleft);
+ tcg_temp_free_i64(right);
+ tcg_temp_free_i64(middle);
+ tcg_temp_free_i64(left);
+ }
+ return true;
+}
+
+static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
+{
+ int n;
+ TCGv_i32 tmp, tmp2, tmp3, tmp4;
+ TCGv_ptr ptr1;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ n = a->len + 1;
+ if ((a->vn + n) > 32) {
+ /*
+ * This is UNPREDICTABLE; we choose to UNDEF to avoid the
+ * helper function running off the end of the register file.
+ */
+ return false;
+ }
+ n <<= 3;
+ if (a->op) {
+ tmp = neon_load_reg(a->vd, 0);
+ } else {
+ tmp = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp, 0);
+ }
+ tmp2 = neon_load_reg(a->vm, 0);
+ ptr1 = vfp_reg_ptr(true, a->vn);
+ tmp4 = tcg_const_i32(n);
+ gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4);
+ tcg_temp_free_i32(tmp);
+ if (a->op) {
+ tmp = neon_load_reg(a->vd, 1);
+ } else {
+ tmp = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp, 0);
+ }
+ tmp3 = neon_load_reg(a->vm, 1);
+ gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4);
+ tcg_temp_free_i32(tmp4);
+ tcg_temp_free_ptr(ptr1);
+ neon_store_reg(a->vd, 0, tmp2);
+ neon_store_reg(a->vd, 1, tmp3);
+ tcg_temp_free_i32(tmp);
+ return true;
+}
+
+static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->vd & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0),
+ neon_element_offset(a->vm, a->index, a->size),
+ a->q ? 16 : 8, a->q ? 16 : 8);
+ return true;
+}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index bcdfec34d2..6d18892ade 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -377,43 +377,6 @@ static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
tcg_gen_ext16s_i32(dest, var);
}
-/* 32x32->64 multiply. Marks inputs as dead. */
-static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 lo = tcg_temp_new_i32();
- TCGv_i32 hi = tcg_temp_new_i32();
- TCGv_i64 ret;
-
- tcg_gen_mulu2_i32(lo, hi, a, b);
- tcg_temp_free_i32(a);
- tcg_temp_free_i32(b);
-
- ret = tcg_temp_new_i64();
- tcg_gen_concat_i32_i64(ret, lo, hi);
- tcg_temp_free_i32(lo);
- tcg_temp_free_i32(hi);
-
- return ret;
-}
-
-static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 lo = tcg_temp_new_i32();
- TCGv_i32 hi = tcg_temp_new_i32();
- TCGv_i64 ret;
-
- tcg_gen_muls2_i32(lo, hi, a, b);
- tcg_temp_free_i32(a);
- tcg_temp_free_i32(b);
-
- ret = tcg_temp_new_i64();
- tcg_gen_concat_i32_i64(ret, lo, hi);
- tcg_temp_free_i32(lo);
- tcg_temp_free_i32(hi);
-
- return ret;
-}
-
/* Swap low and high halfwords. */
static void gen_swap_half(TCGv_i32 var)
{
@@ -2624,24 +2587,6 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
-static void gen_neon_dup_low16(TCGv_i32 var)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_ext16u_i32(var, var);
- tcg_gen_shli_i32(tmp, var, 16);
- tcg_gen_or_i32(var, var, tmp);
- tcg_temp_free_i32(tmp);
-}
-
-static void gen_neon_dup_high16(TCGv_i32 var)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_andi_i32(var, var, 0xffff0000);
- tcg_gen_shri_i32(tmp, var, 16);
- tcg_gen_or_i32(var, var, tmp);
- tcg_temp_free_i32(tmp);
-}
-
static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
{
#ifndef CONFIG_USER_ONLY
@@ -2991,55 +2936,6 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
-static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
-{
- switch (size) {
- case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
- case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
- case 2: tcg_gen_add_i32(t0, t0, t1); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
-{
- switch (size) {
- case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
- case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
- case 2: tcg_gen_sub_i32(t0, t1, t0); break;
- default: return;
- }
-}
-
-static TCGv_i32 neon_load_scratch(int scratch)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
- return tmp;
-}
-
-static void neon_store_scratch(int scratch, TCGv_i32 var)
-{
- tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
- tcg_temp_free_i32(var);
-}
-
-static inline TCGv_i32 neon_get_scalar(int size, int reg)
-{
- TCGv_i32 tmp;
- if (size == 1) {
- tmp = neon_load_reg(reg & 7, reg >> 4);
- if (reg & 8) {
- gen_neon_dup_high16(tmp);
- } else {
- gen_neon_dup_low16(tmp);
- }
- } else {
- tmp = neon_load_reg(reg & 15, reg >> 4);
- }
- return tmp;
-}
-
static int gen_neon_unzip(int rd, int rm, int size, int q)
{
TCGv_ptr pd, pm;
@@ -3231,68 +3127,6 @@ static inline void gen_neon_addl(int size)
}
}
-static inline void gen_neon_subl(int size)
-{
- switch (size) {
- case 0: gen_helper_neon_subl_u16(CPU_V001); break;
- case 1: gen_helper_neon_subl_u32(CPU_V001); break;
- case 2: tcg_gen_sub_i64(CPU_V001); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_negl(TCGv_i64 var, int size)
-{
- switch (size) {
- case 0: gen_helper_neon_negl_u16(var, var); break;
- case 1: gen_helper_neon_negl_u32(var, var); break;
- case 2:
- tcg_gen_neg_i64(var, var);
- break;
- default: abort();
- }
-}
-
-static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
-{
- switch (size) {
- case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
- case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
- int size, int u)
-{
- TCGv_i64 tmp;
-
- switch ((size << 1) | u) {
- case 0: gen_helper_neon_mull_s8(dest, a, b); break;
- case 1: gen_helper_neon_mull_u8(dest, a, b); break;
- case 2: gen_helper_neon_mull_s16(dest, a, b); break;
- case 3: gen_helper_neon_mull_u16(dest, a, b); break;
- case 4:
- tmp = gen_muls_i64_i32(a, b);
- tcg_gen_mov_i64(dest, tmp);
- tcg_temp_free_i64(tmp);
- break;
- case 5:
- tmp = gen_mulu_i64_i32(a, b);
- tcg_gen_mov_i64(dest, tmp);
- tcg_temp_free_i64(tmp);
- break;
- default: abort();
- }
-
- /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
- Don't forget to clean them now. */
- if (size < 2) {
- tcg_temp_free_i32(a);
- tcg_temp_free_i32(b);
- }
-}
-
static void gen_neon_narrow_op(int op, int u, int size,
TCGv_i32 dest, TCGv_i64 src)
{
@@ -5191,15 +5025,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
{
int op;
int q;
- int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
+ int rd, rm, rd_ofs, rm_ofs;
int size;
int pass;
int u;
int vec_size;
- uint32_t imm;
- TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
- TCGv_ptr ptr1;
- TCGv_i64 tmp64;
+ TCGv_i32 tmp, tmp2, tmp3;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return 1;
@@ -5220,12 +5051,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
q = (insn & (1 << 6)) != 0;
u = (insn >> 24) & 1;
VFP_DREG_D(rd, insn);
- VFP_DREG_N(rn, insn);
VFP_DREG_M(rm, insn);
size = (insn >> 20) & 3;
vec_size = q ? 16 : 8;
rd_ofs = neon_reg_offset(rd, 0);
- rn_ofs = neon_reg_offset(rn, 0);
rm_ofs = neon_reg_offset(rm, 0);
if ((insn & (1 << 23)) == 0) {
@@ -5236,454 +5065,15 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
return 1;
} else { /* (insn & 0x00800010 == 0x00800000) */
if (size != 3) {
- op = (insn >> 8) & 0xf;
- if ((insn & (1 << 6)) == 0) {
- /* Three registers of different lengths. */
- int src1_wide;
- int src2_wide;
- int prewiden;
- /* undefreq: bit 0 : UNDEF if size == 0
- * bit 1 : UNDEF if size == 1
- * bit 2 : UNDEF if size == 2
- * bit 3 : UNDEF if U == 1
- * Note that [2:0] set implies 'always UNDEF'
- */
- int undefreq;
- /* prewiden, src1_wide, src2_wide, undefreq */
- static const int neon_3reg_wide[16][4] = {
- {1, 0, 0, 0}, /* VADDL */
- {1, 1, 0, 0}, /* VADDW */
- {1, 0, 0, 0}, /* VSUBL */
- {1, 1, 0, 0}, /* VSUBW */
- {0, 1, 1, 0}, /* VADDHN */
- {0, 0, 0, 0}, /* VABAL */
- {0, 1, 1, 0}, /* VSUBHN */
- {0, 0, 0, 0}, /* VABDL */
- {0, 0, 0, 0}, /* VMLAL */
- {0, 0, 0, 9}, /* VQDMLAL */
- {0, 0, 0, 0}, /* VMLSL */
- {0, 0, 0, 9}, /* VQDMLSL */
- {0, 0, 0, 0}, /* Integer VMULL */
- {0, 0, 0, 9}, /* VQDMULL */
- {0, 0, 0, 0xa}, /* Polynomial VMULL */
- {0, 0, 0, 7}, /* Reserved: always UNDEF */
- };
-
- prewiden = neon_3reg_wide[op][0];
- src1_wide = neon_3reg_wide[op][1];
- src2_wide = neon_3reg_wide[op][2];
- undefreq = neon_3reg_wide[op][3];
-
- if ((undefreq & (1 << size)) ||
- ((undefreq & 8) && u)) {
- return 1;
- }
- if ((src1_wide && (rn & 1)) ||
- (src2_wide && (rm & 1)) ||
- (!src2_wide && (rd & 1))) {
- return 1;
- }
-
- /* Handle polynomial VMULL in a single pass. */
- if (op == 14) {
- if (size == 0) {
- /* VMULL.P8 */
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
- 0, gen_helper_neon_pmull_h);
- } else {
- /* VMULL.P64 */
- if (!dc_isar_feature(aa32_pmull, s)) {
- return 1;
- }
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
- 0, gen_helper_gvec_pmull_q);
- }
- return 0;
- }
-
- /* Avoid overlapping operands. Wide source operands are
- always aligned so will never overlap with wide
- destinations in problematic ways. */
- if (rd == rm && !src2_wide) {
- tmp = neon_load_reg(rm, 1);
- neon_store_scratch(2, tmp);
- } else if (rd == rn && !src1_wide) {
- tmp = neon_load_reg(rn, 1);
- neon_store_scratch(2, tmp);
- }
- tmp3 = NULL;
- for (pass = 0; pass < 2; pass++) {
- if (src1_wide) {
- neon_load_reg64(cpu_V0, rn + pass);
- tmp = NULL;
- } else {
- if (pass == 1 && rd == rn) {
- tmp = neon_load_scratch(2);
- } else {
- tmp = neon_load_reg(rn, pass);
- }
- if (prewiden) {
- gen_neon_widen(cpu_V0, tmp, size, u);
- }
- }
- if (src2_wide) {
- neon_load_reg64(cpu_V1, rm + pass);
- tmp2 = NULL;
- } else {
- if (pass == 1 && rd == rm) {
- tmp2 = neon_load_scratch(2);
- } else {
- tmp2 = neon_load_reg(rm, pass);
- }
- if (prewiden) {
- gen_neon_widen(cpu_V1, tmp2, size, u);
- }
- }
- switch (op) {
- case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
- gen_neon_addl(size);
- break;
- case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
- gen_neon_subl(size);
- break;
- case 5: case 7: /* VABAL, VABDL */
- switch ((size << 1) | u) {
- case 0:
- gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
- break;
- case 1:
- gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
- break;
- case 3:
- gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
- break;
- case 4:
- gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
- break;
- case 5:
- gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
- break;
- default: abort();
- }
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
- break;
- case 8: case 9: case 10: case 11: case 12: case 13:
- /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
- gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
- break;
- default: /* 15 is RESERVED: caught earlier */
- abort();
- }
- if (op == 13) {
- /* VQDMULL */
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- neon_store_reg64(cpu_V0, rd + pass);
- } else if (op == 5 || (op >= 8 && op <= 11)) {
- /* Accumulate. */
- neon_load_reg64(cpu_V1, rd + pass);
- switch (op) {
- case 10: /* VMLSL */
- gen_neon_negl(cpu_V0, size);
- /* Fall through */
- case 5: case 8: /* VABAL, VMLAL */
- gen_neon_addl(size);
- break;
- case 9: case 11: /* VQDMLAL, VQDMLSL */
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- if (op == 11) {
- gen_neon_negl(cpu_V0, size);
- }
- gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
- break;
- default:
- abort();
- }
- neon_store_reg64(cpu_V0, rd + pass);
- } else if (op == 4 || op == 6) {
- /* Narrowing operation. */
- tmp = tcg_temp_new_i32();
- if (!u) {
- switch (size) {
- case 0:
- gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
- break;
- case 1:
- gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
- break;
- case 2:
- tcg_gen_extrh_i64_i32(tmp, cpu_V0);
- break;
- default: abort();
- }
- } else {
- switch (size) {
- case 0:
- gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
- break;
- case 1:
- gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
- break;
- case 2:
- tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
- tcg_gen_extrh_i64_i32(tmp, cpu_V0);
- break;
- default: abort();
- }
- }
- if (pass == 0) {
- tmp3 = tmp;
- } else {
- neon_store_reg(rd, 0, tmp3);
- neon_store_reg(rd, 1, tmp);
- }
- } else {
- /* Write back the result. */
- neon_store_reg64(cpu_V0, rd + pass);
- }
- }
- } else {
- /* Two registers and a scalar. NB that for ops of this form
- * the ARM ARM labels bit 24 as Q, but it is in our variable
- * 'u', not 'q'.
- */
- if (size == 0) {
- return 1;
- }
- switch (op) {
- case 1: /* Float VMLA scalar */
- case 5: /* Floating point VMLS scalar */
- case 9: /* Floating point VMUL scalar */
- if (size == 1) {
- return 1;
- }
- /* fall through */
- case 0: /* Integer VMLA scalar */
- case 4: /* Integer VMLS scalar */
- case 8: /* Integer VMUL scalar */
- case 12: /* VQDMULH scalar */
- case 13: /* VQRDMULH scalar */
- if (u && ((rd | rn) & 1)) {
- return 1;
- }
- tmp = neon_get_scalar(size, rm);
- neon_store_scratch(0, tmp);
- for (pass = 0; pass < (u ? 4 : 2); pass++) {
- tmp = neon_load_scratch(0);
- tmp2 = neon_load_reg(rn, pass);
- if (op == 12) {
- if (size == 1) {
- gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
- } else {
- gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
- }
- } else if (op == 13) {
- if (size == 1) {
- gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
- } else {
- gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
- }
- } else if (op & 1) {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- } else {
- switch (size) {
- case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
- case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
- default: abort();
- }
- }
- tcg_temp_free_i32(tmp2);
- if (op < 8) {
- /* Accumulate. */
- tmp2 = neon_load_reg(rd, pass);
- switch (op) {
- case 0:
- gen_neon_add(size, tmp, tmp2);
- break;
- case 1:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case 4:
- gen_neon_rsb(size, tmp, tmp2);
- break;
- case 5:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- default:
- abort();
- }
- tcg_temp_free_i32(tmp2);
- }
- neon_store_reg(rd, pass, tmp);
- }
- break;
- case 3: /* VQDMLAL scalar */
- case 7: /* VQDMLSL scalar */
- case 11: /* VQDMULL scalar */
- if (u == 1) {
- return 1;
- }
- /* fall through */
- case 2: /* VMLAL sclar */
- case 6: /* VMLSL scalar */
- case 10: /* VMULL scalar */
- if (rd & 1) {
- return 1;
- }
- tmp2 = neon_get_scalar(size, rm);
- /* We need a copy of tmp2 because gen_neon_mull
- * deletes it during pass 0. */
- tmp4 = tcg_temp_new_i32();
- tcg_gen_mov_i32(tmp4, tmp2);
- tmp3 = neon_load_reg(rn, 1);
-
- for (pass = 0; pass < 2; pass++) {
- if (pass == 0) {
- tmp = neon_load_reg(rn, 0);
- } else {
- tmp = tmp3;
- tmp2 = tmp4;
- }
- gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
- if (op != 11) {
- neon_load_reg64(cpu_V1, rd + pass);
- }
- switch (op) {
- case 6:
- gen_neon_negl(cpu_V0, size);
- /* Fall through */
- case 2:
- gen_neon_addl(size);
- break;
- case 3: case 7:
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- if (op == 7) {
- gen_neon_negl(cpu_V0, size);
- }
- gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
- break;
- case 10:
- /* no-op */
- break;
- case 11:
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- break;
- default:
- abort();
- }
- neon_store_reg64(cpu_V0, rd + pass);
- }
- break;
- case 14: /* VQRDMLAH scalar */
- case 15: /* VQRDMLSH scalar */
- {
- NeonGenThreeOpEnvFn *fn;
-
- if (!dc_isar_feature(aa32_rdm, s)) {
- return 1;
- }
- if (u && ((rd | rn) & 1)) {
- return 1;
- }
- if (op == 14) {
- if (size == 1) {
- fn = gen_helper_neon_qrdmlah_s16;
- } else {
- fn = gen_helper_neon_qrdmlah_s32;
- }
- } else {
- if (size == 1) {
- fn = gen_helper_neon_qrdmlsh_s16;
- } else {
- fn = gen_helper_neon_qrdmlsh_s32;
- }
- }
-
- tmp2 = neon_get_scalar(size, rm);
- for (pass = 0; pass < (u ? 4 : 2); pass++) {
- tmp = neon_load_reg(rn, pass);
- tmp3 = neon_load_reg(rd, pass);
- fn(tmp, cpu_env, tmp, tmp2, tmp3);
- tcg_temp_free_i32(tmp3);
- neon_store_reg(rd, pass, tmp);
- }
- tcg_temp_free_i32(tmp2);
- }
- break;
- default:
- g_assert_not_reached();
- }
- }
+ /*
+ * Three registers of different lengths, or two registers and
+ * a scalar: handled by decodetree
+ */
+ return 1;
} else { /* size == 3 */
if (!u) {
- /* Extract. */
- imm = (insn >> 8) & 0xf;
-
- if (imm > 7 && !q)
- return 1;
-
- if (q && ((rd | rn | rm) & 1)) {
- return 1;
- }
-
- if (imm == 0) {
- neon_load_reg64(cpu_V0, rn);
- if (q) {
- neon_load_reg64(cpu_V1, rn + 1);
- }
- } else if (imm == 8) {
- neon_load_reg64(cpu_V0, rn + 1);
- if (q) {
- neon_load_reg64(cpu_V1, rm);
- }
- } else if (q) {
- tmp64 = tcg_temp_new_i64();
- if (imm < 8) {
- neon_load_reg64(cpu_V0, rn);
- neon_load_reg64(tmp64, rn + 1);
- } else {
- neon_load_reg64(cpu_V0, rn + 1);
- neon_load_reg64(tmp64, rm);
- }
- tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
- tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
- if (imm < 8) {
- neon_load_reg64(cpu_V1, rm);
- } else {
- neon_load_reg64(cpu_V1, rm + 1);
- imm -= 8;
- }
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
- tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
- tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
- tcg_temp_free_i64(tmp64);
- } else {
- /* BUGFIX */
- neon_load_reg64(cpu_V0, rn);
- tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
- neon_load_reg64(cpu_V1, rm);
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
- }
- neon_store_reg64(cpu_V0, rd);
- if (q) {
- neon_store_reg64(cpu_V1, rd + 1);
- }
+ /* Extract: handled by decodetree */
+ return 1;
} else if ((insn & (1 << 11)) == 0) {
/* Two register misc. */
op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
@@ -6184,62 +5574,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
}
break;
}
- } else if ((insn & (1 << 10)) == 0) {
- /* VTBL, VTBX. */
- int n = ((insn >> 8) & 3) + 1;
- if ((rn + n) > 32) {
- /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
- * helper function running off the end of the register file.
- */
- return 1;
- }
- n <<= 3;
- if (insn & (1 << 6)) {
- tmp = neon_load_reg(rd, 0);
- } else {
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- }
- tmp2 = neon_load_reg(rm, 0);
- ptr1 = vfp_reg_ptr(true, rn);
- tmp5 = tcg_const_i32(n);
- gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
- tcg_temp_free_i32(tmp);
- if (insn & (1 << 6)) {
- tmp = neon_load_reg(rd, 1);
- } else {
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- }
- tmp3 = neon_load_reg(rm, 1);
- gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
- tcg_temp_free_i32(tmp5);
- tcg_temp_free_ptr(ptr1);
- neon_store_reg(rd, 0, tmp2);
- neon_store_reg(rd, 1, tmp3);
- tcg_temp_free_i32(tmp);
- } else if ((insn & 0x380) == 0) {
- /* VDUP */
- int element;
- MemOp size;
-
- if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
- return 1;
- }
- if (insn & (1 << 16)) {
- size = MO_8;
- element = (insn >> 17) & 7;
- } else if (insn & (1 << 17)) {
- size = MO_16;
- element = (insn >> 18) & 3;
- } else {
- size = MO_32;
- element = (insn >> 19) & 1;
- }
- tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
- neon_element_offset(rm, element, size),
- q ? 16 : 8, q ? 16 : 8);
} else {
+ /* VTBL, VTBX, VDUP: handled by decodetree */
return 1;
}
}
diff --git a/target/arm/translate.h b/target/arm/translate.h
index c937dfe9bf..62ed5c4780 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -371,6 +371,7 @@ typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
+typedef void NeonGenTwoOpWidenFn(TCGv_i64, TCGv_i32, TCGv_i32);
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);