/*
 * Xen 9p backend
 *
 * Copyright Aporeto 2017
 *
 * Authors:
 *  Stefano Stabellini <stefano@aporeto.com>
 *
 */

#include "qemu/osdep.h"

#include "hw/9pfs/9p.h"
#include "hw/xen/xen-legacy-backend.h"
#include "hw/9pfs/xen-9pfs.h"
#include "qapi/error.h"
#include "qemu/config-file.h"
#include "qemu/main-loop.h"
#include "qemu/option.h"
#include "fsdev/qemu-fsdev.h"

#define VERSIONS "1"
#define MAX_RINGS 8
#define MAX_RING_ORDER 9

typedef struct Xen9pfsRing {
    struct Xen9pfsDev *priv;

    int ref;
    xenevtchn_handle   *evtchndev;
    int evtchn;
    int local_port;
    int ring_order;
    struct xen_9pfs_data_intf *intf;
    unsigned char *data;
    struct xen_9pfs_data ring;

    struct iovec *sg;
    QEMUBH *bh;
    Coroutine *co;

    /* local copies, so that we can read/write PDU data directly from
     * the ring */
    RING_IDX out_cons, out_size, in_cons;
    bool inprogress;
} Xen9pfsRing;

typedef struct Xen9pfsDev {
    struct XenLegacyDevice xendev;  /* must be first */
    V9fsState state;
    char *path;
    char *security_model;
    char *tag;
    char *id;

    int num_rings;
    Xen9pfsRing *rings;
} Xen9pfsDev;

static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev);

static void xen_9pfs_in_sg(Xen9pfsRing *ring,
                           struct iovec *in_sg,
                           int *num,
                           uint32_t idx,
                           uint32_t size)
{
    RING_IDX cons, prod, masked_prod, masked_cons;

    cons = ring->intf->in_cons;
    prod = ring->intf->in_prod;
    xen_rmb();
    masked_prod = xen_9pfs_mask(prod, XEN_FLEX_RING_SIZE(ring->ring_order));
    masked_cons = xen_9pfs_mask(cons, XEN_FLEX_RING_SIZE(ring->ring_order));

    if (masked_prod < masked_cons) {
        in_sg[0].iov_base = ring->ring.in + masked_prod;
        in_sg[0].iov_len = masked_cons - masked_prod;
        *num = 1;
    } else {
        in_sg[0].iov_base = ring->ring.in + masked_prod;
        in_sg[0].iov_len = XEN_FLEX_RING_SIZE(ring->ring_order) - masked_prod;
        in_sg[1].iov_base = ring->ring.in;
        in_sg[1].iov_len = masked_cons;
        *num = 2;
    }
}

static void xen_9pfs_out_sg(Xen9pfsRing *ring,
                            struct iovec *out_sg,
                            int *num,
                            uint32_t idx)
{
    RING_IDX cons, prod, masked_prod, masked_cons;

    cons = ring->intf->out_cons;
    prod = ring->intf->out_prod;
    xen_rmb();
    masked_prod = xen_9pfs_mask(prod, XEN_FLEX_RING_SIZE(ring->ring_order));
    masked_cons = xen_9pfs_mask(cons, XEN_FLEX_RING_SIZE(ring->ring_order));

    if (masked_cons < masked_prod) {
        out_sg[0].iov_base = ring->ring.out + masked_cons;
        out_sg[0].iov_len = ring->out_size;
        *num = 1;
    } else {
        if (ring->out_size >
            (XEN_FLEX_RING_SIZE(ring->ring_order) - masked_cons)) {
            out_sg[0].iov_base = ring->ring.out + masked_cons;
            out_sg[0].iov_len = XEN_FLEX_RING_SIZE(ring->ring_order) -
                                masked_cons;
            out_sg[1].iov_base = ring->ring.out;
            out_sg[1].iov_len = ring->out_size -
                                (XEN_FLEX_RING_SIZE(ring->ring_order) -
                                 masked_cons);
            *num = 2;
        } else {
            out_sg[0].iov_base = ring->ring.out + masked_cons;
            out_sg[0].iov_len = ring->out_size;
            *num = 1;
        }
    }
}

static ssize_t xen_9pfs_pdu_vmarshal(V9fsPDU *pdu,
                                     size_t offset,
                                     const char *fmt,
                                     va_list ap)
{
    Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
    struct iovec in_sg[2];
    int num;
    ssize_t ret;

    xen_9pfs_in_sg(&xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings],
                   in_sg, &num, pdu->idx, ROUND_UP(offset + 128, 512));

    ret = v9fs_iov_vmarshal(in_sg, num, offset, 0, fmt, ap);
    if (ret < 0) {
        xen_pv_printf(&xen_9pfs->xendev, 0,
                      "Failed to encode VirtFS reply type %d\n",
                      pdu->id + 1);
        xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing);
        xen_9pfs_disconnect(&xen_9pfs->xendev);
    }
    return ret;
}

static ssize_t xen_9pfs_pdu_vunmarshal(V9fsPDU *pdu,
                                       size_t offset,
                                       const char *fmt,
                                       va_list ap)
{
    Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
    struct iovec out_sg[2];
    int num;
    ssize_t ret;

    xen_9pfs_out_sg(&xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings],
                    out_sg, &num, pdu->idx);

    ret = v9fs_iov_vunmarshal(out_sg, num, offset, 0, fmt, ap);
    if (ret < 0) {
        xen_pv_printf(&xen_9pfs->xendev, 0,
                      "Failed to decode VirtFS request type %d\n", pdu->id);
        xen_be_set_state(&xen_9pfs->xendev, XenbusStateClosing);
        xen_9pfs_disconnect(&xen_9pfs->xendev);
    }
    return ret;
}

static void xen_9pfs_init_out_iov_from_pdu(V9fsPDU *pdu,
                                           struct iovec **piov,
                                           unsigned int *pniov,
                                           size_t size)
{
    Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
    Xen9pfsRing *ring = &xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings];
    int num;

    g_free(ring->sg);

    ring->sg = g_new0(struct iovec, 2);
    xen_9pfs_out_sg(ring, ring->sg, &num, pdu->idx);
    *piov = ring->sg;
    *pniov = num;
}

static void xen_9pfs_init_in_iov_from_pdu(V9fsPDU *pdu,
                                          struct iovec **piov,
                                          unsigned int *pniov,
                                          size_t size)
{
    Xen9pfsDev *xen_9pfs = container_of(pdu->s, Xen9pfsDev, state);
    Xen9pfsRing *ring = &xen_9pfs->rings[pdu->tag % xen_9pfs->num_rings];
    int num;
    size_t buf_size;

    g_free(ring->sg);

    ring->sg = g_new0(struct iovec, 2);
    ring->co = qemu_coroutine_self();
    /* make sure other threads see ring->co changes before continuing */
    smp_wmb();

again:
    xen_9pfs_in_sg(ring, ring->sg, &num, pdu->idx, size);
    buf_size = iov_size(ring->sg, num);
    if (buf_size  < size) {
        qemu_coroutine_yield();
        goto again;
    }
    ring->co = NULL;
    /* make sure other threads see ring->co changes before continuing */
    smp_wmb();

    *piov = ring->sg;
    *pniov = num;
}

static void xen_9pfs_push_and_notify(V9fsPDU *pdu)
{
    RING_IDX prod;
    Xen9pfsDev *priv = container_of(pdu->s, Xen9pfsDev, state);
    Xen9pfsRing *ring = &priv->rings[pdu->tag % priv->num_rings];

    g_free(ring->sg);
    ring->sg = NULL;

    ring->intf->out_cons = ring->out_cons;
    xen_wmb();

    prod = ring->intf->in_prod;
    xen_rmb();
    ring->intf->in_prod = prod + pdu->size;
    xen_wmb();

    ring->inprogress = false;
    xenevtchn_notify(ring->evtchndev, ring->local_port);

    qemu_bh_schedule(ring->bh);
}

static const V9fsTransport xen_9p_transport = {
    .pdu_vmarshal = xen_9pfs_pdu_vmarshal,
    .pdu_vunmarshal = xen_9pfs_pdu_vunmarshal,
    .init_in_iov_from_pdu = xen_9pfs_init_in_iov_from_pdu,
    .init_out_iov_from_pdu = xen_9pfs_init_out_iov_from_pdu,
    .push_and_notify = xen_9pfs_push_and_notify,
};

static int xen_9pfs_init(struct XenLegacyDevice *xendev)
{
    return 0;
}

static int xen_9pfs_receive(Xen9pfsRing *ring)
{
    P9MsgHeader h;
    RING_IDX cons, prod, masked_prod, masked_cons, queued;
    V9fsPDU *pdu;

    if (ring->inprogress) {
        return 0;
    }

    cons = ring->intf->out_cons;
    prod = ring->intf->out_prod;
    xen_rmb();

    queued = xen_9pfs_queued(prod, cons, XEN_FLEX_RING_SIZE(ring->ring_order));
    if (queued < sizeof(h)) {
        return 0;
    }
    ring->inprogress = true;

    masked_prod = xen_9pfs_mask(prod, XEN_FLEX_RING_SIZE(ring->ring_order));
    masked_cons = xen_9pfs_mask(cons, XEN_FLEX_RING_SIZE(ring->ring_order));

    xen_9pfs_read_packet((uint8_t *) &h, ring->ring.out, sizeof(h),
                         masked_prod, &masked_cons,
                         XEN_FLEX_RING_SIZE(ring->ring_order));
    if (queued < le32_to_cpu(h.size_le)) {
        return 0;
    }

    /* cannot fail, because we only handle one request per ring at a time */
    pdu = pdu_alloc(&ring->priv->state);
    ring->out_size = le32_to_cpu(h.size_le);
    ring->out_cons = cons + le32_to_cpu(h.size_le);

    pdu_submit(pdu, &h);

    return 0;
}

static void xen_9pfs_bh(void *opaque)
{
    Xen9pfsRing *ring = opaque;
    bool wait;

again:
    wait = ring->co != NULL && qemu_coroutine_entered(ring->co);
    /* paired with the smb_wmb barriers in xen_9pfs_init_in_iov_from_pdu */
    smp_rmb();
    if (wait) {
        cpu_relax();
        goto again;
    }

    if (ring->co != NULL) {
        qemu_coroutine_enter_if_inactive(ring->co);
    }
    xen_9pfs_receive(ring);
}

static void xen_9pfs_evtchn_event(void *opaque)
{
    Xen9pfsRing *ring = opaque;
    evtchn_port_t port;

    port = xenevtchn_pending(ring->evtchndev);
    xenevtchn_unmask(ring->evtchndev, port);

    qemu_bh_schedule(ring->bh);
}

static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev)
{
    Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
    int i;

    for (i = 0; i < xen_9pdev->num_rings; i++) {
        if (xen_9pdev->rings[i].evtchndev != NULL) {
            qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
                    NULL, NULL, NULL);
            xenevtchn_unbind(xen_9pdev->rings[i].evtchndev,
                             xen_9pdev->rings[i].local_port);
            xen_9pdev->rings[i].evtchndev = NULL;
        }
    }
}

static int xen_9pfs_free(struct XenLegacyDevice *xendev)
{
    Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
    int i;

    if (xen_9pdev->rings[0].evtchndev != NULL) {
        xen_9pfs_disconnect(xendev);
    }

    for (i = 0; i < xen_9pdev->num_rings; i++) {
        if (xen_9pdev->rings[i].data != NULL) {
            xen_be_unmap_grant_refs(&xen_9pdev->xendev,
                                    xen_9pdev->rings[i].data,
                                    (1 << xen_9pdev->rings[i].ring_order));
        }
        if (xen_9pdev->rings[i].intf != NULL) {
            xen_be_unmap_grant_refs(&xen_9pdev->xendev,
                                    xen_9pdev->rings[i].intf,
                                    1);
        }
        if (xen_9pdev->rings[i].bh != NULL) {
            qemu_bh_delete(xen_9pdev->rings[i].bh);
        }
    }

    g_free(xen_9pdev->id);
    g_free(xen_9pdev->tag);
    g_free(xen_9pdev->path);
    g_free(xen_9pdev->security_model);
    g_free(xen_9pdev->rings);
    return 0;
}

static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
{
    Error *err = NULL;
    int i;
    Xen9pfsDev *xen_9pdev = container_of(xendev, Xen9pfsDev, xendev);
    V9fsState *s = &xen_9pdev->state;
    QemuOpts *fsdev;

    if (xenstore_read_fe_int(&xen_9pdev->xendev, "num-rings",
                             &xen_9pdev->num_rings) == -1 ||
        xen_9pdev->num_rings > MAX_RINGS || xen_9pdev->num_rings < 1) {
        return -1;
    }

    xen_9pdev->rings = g_new0(Xen9pfsRing, xen_9pdev->num_rings);
    for (i = 0; i < xen_9pdev->num_rings; i++) {
        char *str;
        int ring_order;

        xen_9pdev->rings[i].priv = xen_9pdev;
        xen_9pdev->rings[i].evtchn = -1;
        xen_9pdev->rings[i].local_port = -1;

        str = g_strdup_printf("ring-ref%u", i);
        if (xenstore_read_fe_int(&xen_9pdev->xendev, str,
                                 &xen_9pdev->rings[i].ref) == -1) {
            g_free(str);
            goto out;
        }
        g_free(str);
        str = g_strdup_printf("event-channel-%u", i);
        if (xenstore_read_fe_int(&xen_9pdev->xendev, str,
                                 &xen_9pdev->rings[i].evtchn) == -1) {
            g_free(str);
            goto out;
        }
        g_free(str);

        xen_9pdev->rings[i].intf =
            xen_be_map_grant_ref(&xen_9pdev->xendev,
                                 xen_9pdev->rings[i].ref,
                                 PROT_READ | PROT_WRITE);
        if (!xen_9pdev->rings[i].intf) {
            goto out;
        }
        ring_order = xen_9pdev->rings[i].intf->ring_order;
        if (ring_order > MAX_RING_ORDER) {
            goto out;
        }
        xen_9pdev->rings[i].ring_order = ring_order;
        xen_9pdev->rings[i].data =
            xen_be_map_grant_refs(&xen_9pdev->xendev,
                                  xen_9pdev->rings[i].intf->ref,
                                  (1 << ring_order),
                                  PROT_READ | PROT_WRITE);
        if (!xen_9pdev->rings[i].data) {
            goto out;
        }
        xen_9pdev->rings[i].ring.in = xen_9pdev->rings[i].data;
        xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data +
                                       XEN_FLEX_RING_SIZE(ring_order);

        xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]);
        xen_9pdev->rings[i].out_cons = 0;
        xen_9pdev->rings[i].out_size = 0;
        xen_9pdev->rings[i].inprogress = false;


        xen_9pdev->rings[i].evtchndev = xenevtchn_open(NULL, 0);
        if (xen_9pdev->rings[i].evtchndev == NULL) {
            goto out;
        }
        qemu_set_cloexec(xenevtchn_fd(xen_9pdev->rings[i].evtchndev));
        xen_9pdev->rings[i].local_port = xenevtchn_bind_interdomain
                                            (xen_9pdev->rings[i].evtchndev,
                                             xendev->dom,
                                             xen_9pdev->rings[i].evtchn);
        if (xen_9pdev->rings[i].local_port == -1) {
            xen_pv_printf(xendev, 0,
                          "xenevtchn_bind_interdomain failed port=%d\n",
                          xen_9pdev->rings[i].evtchn);
            goto out;
        }
        xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
        qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
                xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]);
    }

    xen_9pdev->security_model = xenstore_read_be_str(xendev, "security_model");
    xen_9pdev->path = xenstore_read_be_str(xendev, "path");
    xen_9pdev->id = s->fsconf.fsdev_id =
        g_strdup_printf("xen9p%d", xendev->dev);
    xen_9pdev->tag = s->fsconf.tag = xenstore_read_fe_str(xendev, "tag");
    fsdev = qemu_opts_create(qemu_find_opts("fsdev"),
            s->fsconf.tag,
            1, NULL);
    qemu_opt_set(fsdev, "fsdriver", "local", NULL);
    qemu_opt_set(fsdev, "path", xen_9pdev->path, NULL);
    qemu_opt_set(fsdev, "security_model", xen_9pdev->security_model, NULL);
    qemu_opts_set_id(fsdev, s->fsconf.fsdev_id);
    qemu_fsdev_add(fsdev, &err);
    if (err) {
        error_report_err(err);
    }
    v9fs_device_realize_common(s, &xen_9p_transport, NULL);

    return 0;

out:
    xen_9pfs_free(xendev);
    return -1;
}

static void xen_9pfs_alloc(struct XenLegacyDevice *xendev)
{
    xenstore_write_be_str(xendev, "versions", VERSIONS);
    xenstore_write_be_int(xendev, "max-rings", MAX_RINGS);
    xenstore_write_be_int(xendev, "max-ring-page-order", MAX_RING_ORDER);
}

struct XenDevOps xen_9pfs_ops = {
    .size       = sizeof(Xen9pfsDev),
    .flags      = DEVOPS_FLAG_NEED_GNTDEV,
    .alloc      = xen_9pfs_alloc,
    .init       = xen_9pfs_init,
    .initialise = xen_9pfs_connect,
    .disconnect = xen_9pfs_disconnect,
    .free       = xen_9pfs_free,
};