diff options
author | Stefan Brankovic | 2019-07-15 16:22:50 +0200 |
---|---|---|
committer | David Gibson | 2019-08-21 09:17:11 +0200 |
commit | 083b3f012fc27536afc74d005d706b20eae200f8 (patch) | |
tree | 4b559d3741f803e0a6d2a344f306d26a9dd55986 /target/ppc/int_helper.c | |
parent | target/ppc: move opcode decode tables to PowerPCCPU (diff) | |
download | qemu-083b3f012fc27536afc74d005d706b20eae200f8.tar.gz qemu-083b3f012fc27536afc74d005d706b20eae200f8.tar.xz qemu-083b3f012fc27536afc74d005d706b20eae200f8.zip |
target/ppc: Optimize emulation of vgbbd instruction
Optimize altivec instruction vgbbd (Vector Gather Bits by Bytes by Doubleword)
All ith bits (i in range 1 to 8) of each byte of doubleword element in
source register are concatenated and placed into ith byte of appropriate
doubleword element in destination register.
Following solution is done for both doubleword elements of source register
in parallel, in order to reduce the number of instructions needed(that's why
arrays are used):
First, both doubleword elements of source register vB are placed in
appropriate element of array avr. Bits are gathered in 2x8 iterations(2 for
loops). In first iteration bit 1 of byte 1, bit 2 of byte 2,... bit 8 of
byte 8 are in their final spots so avr[i], i={0,1} can be and-ed with
tcg_mask. For every following iteration, both avr[i] and tcg_mask variables
have to be shifted right for 7 and 8 places, respectively, in order to get
bit 1 of byte 2, bit 2 of byte 3.. bit 7 of byte 8 in their final spots so
shifted avr values(saved in tmp) can be and-ed with new value of tcg_mask...
After first 8 iteration(first loop), all the first bits are in their final
places, all second bits but second bit from eight byte are in their places...
only 1 eight bit from eight byte is in it's place). In second loop we do all
operations symmetrically, in order to get other half of bits in their final
spots. Results for first and second doubleword elements are saved in
result[0] and result[1] respectively. In the end those results are saved in
appropriate doubleword element of destination register vD.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-5-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Diffstat (limited to 'target/ppc/int_helper.c')
-rw-r--r-- | target/ppc/int_helper.c | 276 |
1 files changed, 0 insertions, 276 deletions
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index d2cad787ad..a265cb07c5 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -1187,282 +1187,6 @@ void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) #undef VBPERMQ_INDEX #undef VBPERMQ_DW -static const uint64_t VGBBD_MASKS[256] = { - 0x0000000000000000ull, /* 00 */ - 0x0000000000000080ull, /* 01 */ - 0x0000000000008000ull, /* 02 */ - 0x0000000000008080ull, /* 03 */ - 0x0000000000800000ull, /* 04 */ - 0x0000000000800080ull, /* 05 */ - 0x0000000000808000ull, /* 06 */ - 0x0000000000808080ull, /* 07 */ - 0x0000000080000000ull, /* 08 */ - 0x0000000080000080ull, /* 09 */ - 0x0000000080008000ull, /* 0A */ - 0x0000000080008080ull, /* 0B */ - 0x0000000080800000ull, /* 0C */ - 0x0000000080800080ull, /* 0D */ - 0x0000000080808000ull, /* 0E */ - 0x0000000080808080ull, /* 0F */ - 0x0000008000000000ull, /* 10 */ - 0x0000008000000080ull, /* 11 */ - 0x0000008000008000ull, /* 12 */ - 0x0000008000008080ull, /* 13 */ - 0x0000008000800000ull, /* 14 */ - 0x0000008000800080ull, /* 15 */ - 0x0000008000808000ull, /* 16 */ - 0x0000008000808080ull, /* 17 */ - 0x0000008080000000ull, /* 18 */ - 0x0000008080000080ull, /* 19 */ - 0x0000008080008000ull, /* 1A */ - 0x0000008080008080ull, /* 1B */ - 0x0000008080800000ull, /* 1C */ - 0x0000008080800080ull, /* 1D */ - 0x0000008080808000ull, /* 1E */ - 0x0000008080808080ull, /* 1F */ - 0x0000800000000000ull, /* 20 */ - 0x0000800000000080ull, /* 21 */ - 0x0000800000008000ull, /* 22 */ - 0x0000800000008080ull, /* 23 */ - 0x0000800000800000ull, /* 24 */ - 0x0000800000800080ull, /* 25 */ - 0x0000800000808000ull, /* 26 */ - 0x0000800000808080ull, /* 27 */ - 0x0000800080000000ull, /* 28 */ - 0x0000800080000080ull, /* 29 */ - 0x0000800080008000ull, /* 2A */ - 0x0000800080008080ull, /* 2B */ - 0x0000800080800000ull, /* 2C */ - 0x0000800080800080ull, /* 2D */ - 0x0000800080808000ull, /* 2E */ - 0x0000800080808080ull, /* 2F */ - 0x0000808000000000ull, /* 30 */ - 0x0000808000000080ull, /* 31 */ - 0x0000808000008000ull, /* 32 */ - 0x0000808000008080ull, /* 33 */ - 0x0000808000800000ull, /* 34 */ - 0x0000808000800080ull, /* 35 */ - 0x0000808000808000ull, /* 36 */ - 0x0000808000808080ull, /* 37 */ - 0x0000808080000000ull, /* 38 */ - 0x0000808080000080ull, /* 39 */ - 0x0000808080008000ull, /* 3A */ - 0x0000808080008080ull, /* 3B */ - 0x0000808080800000ull, /* 3C */ - 0x0000808080800080ull, /* 3D */ - 0x0000808080808000ull, /* 3E */ - 0x0000808080808080ull, /* 3F */ - 0x0080000000000000ull, /* 40 */ - 0x0080000000000080ull, /* 41 */ - 0x0080000000008000ull, /* 42 */ - 0x0080000000008080ull, /* 43 */ - 0x0080000000800000ull, /* 44 */ - 0x0080000000800080ull, /* 45 */ - 0x0080000000808000ull, /* 46 */ - 0x0080000000808080ull, /* 47 */ - 0x0080000080000000ull, /* 48 */ - 0x0080000080000080ull, /* 49 */ - 0x0080000080008000ull, /* 4A */ - 0x0080000080008080ull, /* 4B */ - 0x0080000080800000ull, /* 4C */ - 0x0080000080800080ull, /* 4D */ - 0x0080000080808000ull, /* 4E */ - 0x0080000080808080ull, /* 4F */ - 0x0080008000000000ull, /* 50 */ - 0x0080008000000080ull, /* 51 */ - 0x0080008000008000ull, /* 52 */ - 0x0080008000008080ull, /* 53 */ - 0x0080008000800000ull, /* 54 */ - 0x0080008000800080ull, /* 55 */ - 0x0080008000808000ull, /* 56 */ - 0x0080008000808080ull, /* 57 */ - 0x0080008080000000ull, /* 58 */ - 0x0080008080000080ull, /* 59 */ - 0x0080008080008000ull, /* 5A */ - 0x0080008080008080ull, /* 5B */ - 0x0080008080800000ull, /* 5C */ - 0x0080008080800080ull, /* 5D */ - 0x0080008080808000ull, /* 5E */ - 0x0080008080808080ull, /* 5F */ - 0x0080800000000000ull, /* 60 */ - 0x0080800000000080ull, /* 61 */ - 0x0080800000008000ull, /* 62 */ - 0x0080800000008080ull, /* 63 */ - 0x0080800000800000ull, /* 64 */ - 0x0080800000800080ull, /* 65 */ - 0x0080800000808000ull, /* 66 */ - 0x0080800000808080ull, /* 67 */ - 0x0080800080000000ull, /* 68 */ - 0x0080800080000080ull, /* 69 */ - 0x0080800080008000ull, /* 6A */ - 0x0080800080008080ull, /* 6B */ - 0x0080800080800000ull, /* 6C */ - 0x0080800080800080ull, /* 6D */ - 0x0080800080808000ull, /* 6E */ - 0x0080800080808080ull, /* 6F */ - 0x0080808000000000ull, /* 70 */ - 0x0080808000000080ull, /* 71 */ - 0x0080808000008000ull, /* 72 */ - 0x0080808000008080ull, /* 73 */ - 0x0080808000800000ull, /* 74 */ - 0x0080808000800080ull, /* 75 */ - 0x0080808000808000ull, /* 76 */ - 0x0080808000808080ull, /* 77 */ - 0x0080808080000000ull, /* 78 */ - 0x0080808080000080ull, /* 79 */ - 0x0080808080008000ull, /* 7A */ - 0x0080808080008080ull, /* 7B */ - 0x0080808080800000ull, /* 7C */ - 0x0080808080800080ull, /* 7D */ - 0x0080808080808000ull, /* 7E */ - 0x0080808080808080ull, /* 7F */ - 0x8000000000000000ull, /* 80 */ - 0x8000000000000080ull, /* 81 */ - 0x8000000000008000ull, /* 82 */ - 0x8000000000008080ull, /* 83 */ - 0x8000000000800000ull, /* 84 */ - 0x8000000000800080ull, /* 85 */ - 0x8000000000808000ull, /* 86 */ - 0x8000000000808080ull, /* 87 */ - 0x8000000080000000ull, /* 88 */ - 0x8000000080000080ull, /* 89 */ - 0x8000000080008000ull, /* 8A */ - 0x8000000080008080ull, /* 8B */ - 0x8000000080800000ull, /* 8C */ - 0x8000000080800080ull, /* 8D */ - 0x8000000080808000ull, /* 8E */ - 0x8000000080808080ull, /* 8F */ - 0x8000008000000000ull, /* 90 */ - 0x8000008000000080ull, /* 91 */ - 0x8000008000008000ull, /* 92 */ - 0x8000008000008080ull, /* 93 */ - 0x8000008000800000ull, /* 94 */ - 0x8000008000800080ull, /* 95 */ - 0x8000008000808000ull, /* 96 */ - 0x8000008000808080ull, /* 97 */ - 0x8000008080000000ull, /* 98 */ - 0x8000008080000080ull, /* 99 */ - 0x8000008080008000ull, /* 9A */ - 0x8000008080008080ull, /* 9B */ - 0x8000008080800000ull, /* 9C */ - 0x8000008080800080ull, /* 9D */ - 0x8000008080808000ull, /* 9E */ - 0x8000008080808080ull, /* 9F */ - 0x8000800000000000ull, /* A0 */ - 0x8000800000000080ull, /* A1 */ - 0x8000800000008000ull, /* A2 */ - 0x8000800000008080ull, /* A3 */ - 0x8000800000800000ull, /* A4 */ - 0x8000800000800080ull, /* A5 */ - 0x8000800000808000ull, /* A6 */ - 0x8000800000808080ull, /* A7 */ - 0x8000800080000000ull, /* A8 */ - 0x8000800080000080ull, /* A9 */ - 0x8000800080008000ull, /* AA */ - 0x8000800080008080ull, /* AB */ - 0x8000800080800000ull, /* AC */ - 0x8000800080800080ull, /* AD */ - 0x8000800080808000ull, /* AE */ - 0x8000800080808080ull, /* AF */ - 0x8000808000000000ull, /* B0 */ - 0x8000808000000080ull, /* B1 */ - 0x8000808000008000ull, /* B2 */ - 0x8000808000008080ull, /* B3 */ - 0x8000808000800000ull, /* B4 */ - 0x8000808000800080ull, /* B5 */ - 0x8000808000808000ull, /* B6 */ - 0x8000808000808080ull, /* B7 */ - 0x8000808080000000ull, /* B8 */ - 0x8000808080000080ull, /* B9 */ - 0x8000808080008000ull, /* BA */ - 0x8000808080008080ull, /* BB */ - 0x8000808080800000ull, /* BC */ - 0x8000808080800080ull, /* BD */ - 0x8000808080808000ull, /* BE */ - 0x8000808080808080ull, /* BF */ - 0x8080000000000000ull, /* C0 */ - 0x8080000000000080ull, /* C1 */ - 0x8080000000008000ull, /* C2 */ - 0x8080000000008080ull, /* C3 */ - 0x8080000000800000ull, /* C4 */ - 0x8080000000800080ull, /* C5 */ - 0x8080000000808000ull, /* C6 */ - 0x8080000000808080ull, /* C7 */ - 0x8080000080000000ull, /* C8 */ - 0x8080000080000080ull, /* C9 */ - 0x8080000080008000ull, /* CA */ - 0x8080000080008080ull, /* CB */ - 0x8080000080800000ull, /* CC */ - 0x8080000080800080ull, /* CD */ - 0x8080000080808000ull, /* CE */ - 0x8080000080808080ull, /* CF */ - 0x8080008000000000ull, /* D0 */ - 0x8080008000000080ull, /* D1 */ - 0x8080008000008000ull, /* D2 */ - 0x8080008000008080ull, /* D3 */ - 0x8080008000800000ull, /* D4 */ - 0x8080008000800080ull, /* D5 */ - 0x8080008000808000ull, /* D6 */ - 0x8080008000808080ull, /* D7 */ - 0x8080008080000000ull, /* D8 */ - 0x8080008080000080ull, /* D9 */ - 0x8080008080008000ull, /* DA */ - 0x8080008080008080ull, /* DB */ - 0x8080008080800000ull, /* DC */ - 0x8080008080800080ull, /* DD */ - 0x8080008080808000ull, /* DE */ - 0x8080008080808080ull, /* DF */ - 0x8080800000000000ull, /* E0 */ - 0x8080800000000080ull, /* E1 */ - 0x8080800000008000ull, /* E2 */ - 0x8080800000008080ull, /* E3 */ - 0x8080800000800000ull, /* E4 */ - 0x8080800000800080ull, /* E5 */ - 0x8080800000808000ull, /* E6 */ - 0x8080800000808080ull, /* E7 */ - 0x8080800080000000ull, /* E8 */ - 0x8080800080000080ull, /* E9 */ - 0x8080800080008000ull, /* EA */ - 0x8080800080008080ull, /* EB */ - 0x8080800080800000ull, /* EC */ - 0x8080800080800080ull, /* ED */ - 0x8080800080808000ull, /* EE */ - 0x8080800080808080ull, /* EF */ - 0x8080808000000000ull, /* F0 */ - 0x8080808000000080ull, /* F1 */ - 0x8080808000008000ull, /* F2 */ - 0x8080808000008080ull, /* F3 */ - 0x8080808000800000ull, /* F4 */ - 0x8080808000800080ull, /* F5 */ - 0x8080808000808000ull, /* F6 */ - 0x8080808000808080ull, /* F7 */ - 0x8080808080000000ull, /* F8 */ - 0x8080808080000080ull, /* F9 */ - 0x8080808080008000ull, /* FA */ - 0x8080808080008080ull, /* FB */ - 0x8080808080800000ull, /* FC */ - 0x8080808080800080ull, /* FD */ - 0x8080808080808000ull, /* FE */ - 0x8080808080808080ull, /* FF */ -}; - -void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b) -{ - int i; - uint64_t t[2] = { 0, 0 }; - - VECTOR_FOR_INORDER_I(i, u8) { -#if defined(HOST_WORDS_BIGENDIAN) - t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7); -#else - t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (7 - (i & 7)); -#endif - } - - r->u64[0] = t[0]; - r->u64[1] = t[1]; -} - #define PMSUM(name, srcfld, trgfld, trgtyp) \ void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ |