summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--accel/tcg/tcg-runtime.h11
-rw-r--r--include/exec/helper-proto.h4
-rw-r--r--include/tcg/tcg-op.h2
-rw-r--r--tcg/riscv/tcg-target.c.inc12
-rw-r--r--tcg/tcg-op-gvec.c32
5 files changed, 55 insertions, 6 deletions
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 4eda24e63a..2e36d6eb0c 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -28,6 +28,17 @@ DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env)
DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
+#ifndef IN_HELPER_PROTO
+/*
+ * Pass calls to memset directly to libc, without a thunk in qemu.
+ * Do not re-declare memset, especially since we fudge the type here;
+ * we assume sizeof(void *) == sizeof(size_t), which is true for
+ * all supported hosts.
+ */
+#define helper_memset memset
+DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
+#endif /* IN_HELPER_PROTO */
+
#ifdef CONFIG_SOFTMMU
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h
index a0a8d9aa46..659f9298e8 100644
--- a/include/exec/helper-proto.h
+++ b/include/exec/helper-proto.h
@@ -35,11 +35,15 @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \
dh_ctype(t7));
+#define IN_HELPER_PROTO
+
#include "helper.h"
#include "trace/generated-helpers.h"
#include "tcg-runtime.h"
#include "plugin-helpers.h"
+#undef IN_HELPER_PROTO
+
#undef DEF_HELPER_FLAGS_0
#undef DEF_HELPER_FLAGS_1
#undef DEF_HELPER_FLAGS_2
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
index 5abf17fecc..5b3bdacc39 100644
--- a/include/tcg/tcg-op.h
+++ b/include/tcg/tcg-op.h
@@ -1085,6 +1085,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
#define tcg_gen_bswap16_tl tcg_gen_bswap16_i64
#define tcg_gen_bswap32_tl tcg_gen_bswap32_i64
#define tcg_gen_bswap64_tl tcg_gen_bswap64_i64
+#define tcg_gen_bswap_tl tcg_gen_bswap64_i64
#define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64
#define tcg_gen_extr_i64_tl tcg_gen_extr32_i64
#define tcg_gen_andc_tl tcg_gen_andc_i64
@@ -1197,6 +1198,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
#define tcg_gen_ext32s_tl tcg_gen_mov_i32
#define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
#define tcg_gen_bswap32_tl tcg_gen_bswap32_i32
+#define tcg_gen_bswap_tl tcg_gen_bswap32_i32
#define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
#define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32
#define tcg_gen_andc_tl tcg_gen_andc_i32
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index d536f3ccc1..4089e29cd9 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -1462,14 +1462,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_shl_i32:
if (c2) {
- tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2);
+ tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2 & 0x1f);
} else {
tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2);
}
break;
case INDEX_op_shl_i64:
if (c2) {
- tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2);
+ tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2 & 0x3f);
} else {
tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2);
}
@@ -1477,14 +1477,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_shr_i32:
if (c2) {
- tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2);
+ tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2 & 0x1f);
} else {
tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2);
}
break;
case INDEX_op_shr_i64:
if (c2) {
- tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2);
+ tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2 & 0x3f);
} else {
tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2);
}
@@ -1492,14 +1492,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_sar_i32:
if (c2) {
- tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2);
+ tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2 & 0x1f);
} else {
tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2);
}
break;
case INDEX_op_sar_i64:
if (c2) {
- tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2);
+ tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2 & 0x3f);
} else {
tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2);
}
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index ddbe06b71a..1a41dfa908 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -547,6 +547,9 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
in_c = dup_const(vece, in_c);
if (in_c == 0) {
oprsz = maxsz;
+ vece = MO_8;
+ } else if (in_c == dup_const(MO_8, in_c)) {
+ vece = MO_8;
}
}
@@ -628,6 +631,35 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
/* Otherwise implement out of line. */
t_ptr = tcg_temp_new_ptr();
tcg_gen_addi_ptr(t_ptr, cpu_env, dofs);
+
+ /*
+ * This may be expand_clr for the tail of an operation, e.g.
+ * oprsz == 8 && maxsz == 64. The size of the clear is misaligned
+ * wrt simd_desc and will assert. Simply pass all replicated byte
+ * stores through to memset.
+ */
+ if (oprsz == maxsz && vece == MO_8) {
+ TCGv_ptr t_size = tcg_const_ptr(oprsz);
+ TCGv_i32 t_val;
+
+ if (in_32) {
+ t_val = in_32;
+ } else if (in_64) {
+ t_val = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t_val, in_64);
+ } else {
+ t_val = tcg_const_i32(in_c);
+ }
+ gen_helper_memset(t_ptr, t_ptr, t_val, t_size);
+
+ if (!in_32) {
+ tcg_temp_free_i32(t_val);
+ }
+ tcg_temp_free_ptr(t_size);
+ tcg_temp_free_ptr(t_ptr);
+ return;
+ }
+
t_desc = tcg_const_i32(simd_desc(oprsz, maxsz, 0));
if (vece == MO_64) {