summaryrefslogblamecommitdiffstats
path: root/target/s390x/vec_string_helper.c
blob: 0ee3470112b517e861218edf4201f9f61eab7c6a (plain) (tree)





























                                                                            








                                                                 



























































































































                                                                                
























































                                                                                
































































                                                                                
/*
 * QEMU TCG support -- s390x vector string instruction support
 *
 * Copyright (C) 2019 Red Hat Inc
 *
 * Authors:
 *   David Hildenbrand <david@redhat.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 */
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "cpu.h"
#include "internal.h"
#include "vec.h"
#include "tcg/tcg.h"
#include "tcg/tcg-gvec-desc.h"
#include "exec/helper-proto.h"

/*
 * Returns a bit set in the MSB of each element that is zero,
 * as defined by the mask.
 */
static inline uint64_t zero_search(uint64_t a, uint64_t mask)
{
    return ~(((a & mask) + mask) | a | mask);
}

/*
 * Returns a bit set in the MSB of each element that is not zero,
 * as defined by the mask.
 */
static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
{
    return (((a & mask) + mask) | a) & ~mask;
}

/*
 * Returns the byte offset for the first match, or 16 for no match.
 */
static inline int match_index(uint64_t c0, uint64_t c1)
{
    return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
}

/*
 * Returns the number of bits composing one element.
 */
static uint8_t get_element_bits(uint8_t es)
{
    return (1 << es) * BITS_PER_BYTE;
}

/*
 * Returns the bitmask for a single element.
 */
static uint64_t get_single_element_mask(uint8_t es)
{
    return -1ull >> (64 - get_element_bits(es));
}

/*
 * Returns the bitmask for a single element (excluding the MSB).
 */
static uint64_t get_single_element_lsbs_mask(uint8_t es)
{
    return -1ull >> (65 - get_element_bits(es));
}

/*
 * Returns the bitmasks for multiple elements (excluding the MSBs).
 */
static uint64_t get_element_lsbs_mask(uint8_t es)
{
    return dup_const(es, get_single_element_lsbs_mask(es));
}

static int vfae(void *v1, const void *v2, const void *v3, bool in,
                bool rt, bool zs, uint8_t es)
{
    const uint64_t mask = get_element_lsbs_mask(es);
    const int bits = get_element_bits(es);
    uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
    uint64_t first_zero = 16;
    uint64_t first_equal;
    int i;

    a0 = s390_vec_read_element64(v2, 0);
    a1 = s390_vec_read_element64(v2, 1);
    b0 = s390_vec_read_element64(v3, 0);
    b1 = s390_vec_read_element64(v3, 1);
    e0 = 0;
    e1 = 0;
    /* compare against equality with every other element */
    for (i = 0; i < 64; i += bits) {
        t0 = rol64(b0, i);
        t1 = rol64(b1, i);
        e0 |= zero_search(a0 ^ t0, mask);
        e0 |= zero_search(a0 ^ t1, mask);
        e1 |= zero_search(a1 ^ t0, mask);
        e1 |= zero_search(a1 ^ t1, mask);
    }
    /* invert the result if requested - invert only the MSBs */
    if (in) {
        e0 = ~e0 & ~mask;
        e1 = ~e1 & ~mask;
    }
    first_equal = match_index(e0, e1);

    if (zs) {
        z0 = zero_search(a0, mask);
        z1 = zero_search(a1, mask);
        first_zero = match_index(z0, z1);
    }

    if (rt) {
        e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
        e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
        s390_vec_write_element64(v1, 0, e0);
        s390_vec_write_element64(v1, 1, e1);
    } else {
        s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
        s390_vec_write_element64(v1, 1, 0);
    }

    if (first_zero == 16 && first_equal == 16) {
        return 3; /* no match */
    } else if (first_zero == 16) {
        return 1; /* matching elements, no match for zero */
    } else if (first_equal < first_zero) {
        return 2; /* matching elements before match for zero */
    }
    return 0; /* match for zero */
}

#define DEF_VFAE_HELPER(BITS)                                                  \
void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3,         \
                             uint32_t desc)                                    \
{                                                                              \
    const bool in = extract32(simd_data(desc), 3, 1);                          \
    const bool rt = extract32(simd_data(desc), 2, 1);                          \
    const bool zs = extract32(simd_data(desc), 1, 1);                          \
                                                                               \
    vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                                   \
}
DEF_VFAE_HELPER(8)
DEF_VFAE_HELPER(16)
DEF_VFAE_HELPER(32)

#define DEF_VFAE_CC_HELPER(BITS)                                               \
void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3,      \
                                CPUS390XState *env, uint32_t desc)             \
{                                                                              \
    const bool in = extract32(simd_data(desc), 3, 1);                          \
    const bool rt = extract32(simd_data(desc), 2, 1);                          \
    const bool zs = extract32(simd_data(desc), 1, 1);                          \
                                                                               \
    env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                      \
}
DEF_VFAE_CC_HELPER(8)
DEF_VFAE_CC_HELPER(16)
DEF_VFAE_CC_HELPER(32)

static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
{
    const uint64_t mask = get_element_lsbs_mask(es);
    uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
    uint64_t first_zero = 16;
    uint64_t first_equal;

    a0 = s390_vec_read_element64(v2, 0);
    a1 = s390_vec_read_element64(v2, 1);
    b0 = s390_vec_read_element64(v3, 0);
    b1 = s390_vec_read_element64(v3, 1);
    e0 = zero_search(a0 ^ b0, mask);
    e1 = zero_search(a1 ^ b1, mask);
    first_equal = match_index(e0, e1);

    if (zs) {
        z0 = zero_search(a0, mask);
        z1 = zero_search(a1, mask);
        first_zero = match_index(z0, z1);
    }

    s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
    s390_vec_write_element64(v1, 1, 0);
    if (first_zero == 16 && first_equal == 16) {
        return 3; /* no match */
    } else if (first_zero == 16) {
        return 1; /* matching elements, no match for zero */
    } else if (first_equal < first_zero) {
        return 2; /* matching elements before match for zero */
    }
    return 0; /* match for zero */
}

#define DEF_VFEE_HELPER(BITS)                                                  \
void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3,         \
                             uint32_t desc)                                    \
{                                                                              \
    const bool zs = extract32(simd_data(desc), 1, 1);                          \
                                                                               \
    vfee(v1, v2, v3, zs, MO_##BITS);                                           \
}
DEF_VFEE_HELPER(8)
DEF_VFEE_HELPER(16)
DEF_VFEE_HELPER(32)

#define DEF_VFEE_CC_HELPER(BITS)                                               \
void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3,      \
                                CPUS390XState *env, uint32_t desc)             \
{                                                                              \
    const bool zs = extract32(simd_data(desc), 1, 1);                          \
                                                                               \
    env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS);                              \
}
DEF_VFEE_CC_HELPER(8)
DEF_VFEE_CC_HELPER(16)
DEF_VFEE_CC_HELPER(32)

static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
{
    const uint64_t mask = get_element_lsbs_mask(es);
    uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
    uint64_t first_zero = 16;
    uint64_t first_inequal;
    bool smaller = false;

    a0 = s390_vec_read_element64(v2, 0);
    a1 = s390_vec_read_element64(v2, 1);
    b0 = s390_vec_read_element64(v3, 0);
    b1 = s390_vec_read_element64(v3, 1);
    e0 = nonzero_search(a0 ^ b0, mask);
    e1 = nonzero_search(a1 ^ b1, mask);
    first_inequal = match_index(e0, e1);

    /* identify the smaller element */
    if (first_inequal < 16) {
        uint8_t enr = first_inequal / (1 << es);
        uint32_t a = s390_vec_read_element(v2, enr, es);
        uint32_t b = s390_vec_read_element(v3, enr, es);

        smaller = a < b;
    }

    if (zs) {
        z0 = zero_search(a0, mask);
        z1 = zero_search(a1, mask);
        first_zero = match_index(z0, z1);
    }

    s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
    s390_vec_write_element64(v1, 1, 0);
    if (first_zero == 16 && first_inequal == 16) {
        return 3;
    } else if (first_zero < first_inequal) {
        return 0;
    }
    return smaller ? 1 : 2;
}

#define DEF_VFENE_HELPER(BITS)                                                 \
void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3,        \
                              uint32_t desc)                                   \
{                                                                              \
    const bool zs = extract32(simd_data(desc), 1, 1);                          \
                                                                               \
    vfene(v1, v2, v3, zs, MO_##BITS);                                          \
}
DEF_VFENE_HELPER(8)
DEF_VFENE_HELPER(16)
DEF_VFENE_HELPER(32)

#define DEF_VFENE_CC_HELPER(BITS)                                              \
void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3,     \
                                 CPUS390XState *env, uint32_t desc)            \
{                                                                              \
    const bool zs = extract32(simd_data(desc), 1, 1);                          \
                                                                               \
    env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS);                             \
}
DEF_VFENE_CC_HELPER(8)
DEF_VFENE_CC_HELPER(16)
DEF_VFENE_CC_HELPER(32)