summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnton Ivanov2014-06-20 11:34:41 +0200
committerStefan Hajnoczi2014-06-27 10:39:10 +0200
commit3fb69aa1d127585fe9626c3d777a8ce2fc01a36d (patch)
tree446b2545b76989bcf608593d2b0c3ac59574d503
parentqemu-bridge-helper: Fix fd leak in main() (diff)
downloadqemu-3fb69aa1d127585fe9626c3d777a8ce2fc01a36d.tar.gz
qemu-3fb69aa1d127585fe9626c3d777a8ce2fc01a36d.tar.xz
qemu-3fb69aa1d127585fe9626c3d777a8ce2fc01a36d.zip
net: L2TPv3 transport
This transport allows to connect a QEMU nic to a static Ethernet over L2TPv3 tunnel. The transport supports all options present in the Linux kernel implementation. It allows QEMU to connect to any Linux host running kernel 3.3+, most routers and network devices as well as other QEMU instances. [Fixed up net_client_init1() switch statement to support -netdev --Stefan] Signed-off-by: Anton Ivanov <antivano@cisco.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r--net/Makefile.objs1
-rw-r--r--net/clients.h2
-rw-r--r--net/l2tpv3.c757
-rw-r--r--net/net.c6
-rw-r--r--qapi-schema.json60
-rw-r--r--qemu-options.hx82
6 files changed, 908 insertions, 0 deletions
diff --git a/net/Makefile.objs b/net/Makefile.objs
index 301f6b6b51..a06ba59dad 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -2,6 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
common-obj-y += socket.o
common-obj-y += dump.o
common-obj-y += eth.o
+common-obj-$(CONFIG_LINUX) += l2tpv3.o
common-obj-$(CONFIG_POSIX) += tap.o vhost-user.o
common-obj-$(CONFIG_LINUX) += tap-linux.o
common-obj-$(CONFIG_WIN32) += tap-win32.o
diff --git a/net/clients.h b/net/clients.h
index 7f3d4ae9f3..2e8fedad8d 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -47,6 +47,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
int net_init_bridge(const NetClientOptions *opts, const char *name,
NetClientState *peer);
+int net_init_l2tpv3(const NetClientOptions *opts, const char *name,
+ NetClientState *peer);
#ifdef CONFIG_VDE
int net_init_vde(const NetClientOptions *opts, const char *name,
NetClientState *peer);
diff --git a/net/l2tpv3.c b/net/l2tpv3.c
new file mode 100644
index 0000000000..528d95b641
--- /dev/null
+++ b/net/l2tpv3.c
@@ -0,0 +1,757 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2012-2014 Cisco Systems
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <linux/ip.h>
+#include <netdb.h>
+#include "config-host.h"
+#include "net/net.h"
+#include "clients.h"
+#include "monitor/monitor.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+
+
+/* The buffer size needs to be investigated for optimum numbers and
+ * optimum means of paging in on different systems. This size is
+ * chosen to be sufficient to accommodate one packet with some headers
+ */
+
+#define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
+#define BUFFER_SIZE 2048
+#define IOVSIZE 2
+#define MAX_L2TPV3_MSGCNT 64
+#define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
+
+/* Header set to 0x30000 signifies a data packet */
+
+#define L2TPV3_DATA_PACKET 0x30000
+
+/* IANA-assigned IP protocol ID for L2TPv3 */
+
+#ifndef IPPROTO_L2TP
+#define IPPROTO_L2TP 0x73
+#endif
+
+typedef struct NetL2TPV3State {
+ NetClientState nc;
+ int fd;
+
+ /*
+ * these are used for xmit - that happens packet a time
+ * and for first sign of life packet (easier to parse that once)
+ */
+
+ uint8_t *header_buf;
+ struct iovec *vec;
+
+ /*
+ * these are used for receive - try to "eat" up to 32 packets at a time
+ */
+
+ struct mmsghdr *msgvec;
+
+ /*
+ * peer address
+ */
+
+ struct sockaddr_storage *dgram_dst;
+ uint32_t dst_size;
+
+ /*
+ * L2TPv3 parameters
+ */
+
+ uint64_t rx_cookie;
+ uint64_t tx_cookie;
+ uint32_t rx_session;
+ uint32_t tx_session;
+ uint32_t header_size;
+ uint32_t counter;
+
+ /*
+ * DOS avoidance in error handling
+ */
+
+ bool header_mismatch;
+
+ /*
+ * Ring buffer handling
+ */
+
+ int queue_head;
+ int queue_tail;
+ int queue_depth;
+
+ /*
+ * Precomputed offsets
+ */
+
+ uint32_t offset;
+ uint32_t cookie_offset;
+ uint32_t counter_offset;
+ uint32_t session_offset;
+
+ /* Poll Control */
+
+ bool read_poll;
+ bool write_poll;
+
+ /* Flags */
+
+ bool ipv6;
+ bool udp;
+ bool has_counter;
+ bool pin_counter;
+ bool cookie;
+ bool cookie_is_64;
+
+} NetL2TPV3State;
+
+static int l2tpv3_can_send(void *opaque);
+static void net_l2tpv3_send(void *opaque);
+static void l2tpv3_writable(void *opaque);
+
+static void l2tpv3_update_fd_handler(NetL2TPV3State *s)
+{
+ qemu_set_fd_handler2(s->fd,
+ s->read_poll ? l2tpv3_can_send : NULL,
+ s->read_poll ? net_l2tpv3_send : NULL,
+ s->write_poll ? l2tpv3_writable : NULL,
+ s);
+}
+
+static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable)
+{
+ if (s->read_poll != enable) {
+ s->read_poll = enable;
+ l2tpv3_update_fd_handler(s);
+ }
+}
+
+static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable)
+{
+ if (s->write_poll != enable) {
+ s->write_poll = enable;
+ l2tpv3_update_fd_handler(s);
+ }
+}
+
+static void l2tpv3_writable(void *opaque)
+{
+ NetL2TPV3State *s = opaque;
+ l2tpv3_write_poll(s, false);
+ qemu_flush_queued_packets(&s->nc);
+}
+
+static int l2tpv3_can_send(void *opaque)
+{
+ NetL2TPV3State *s = opaque;
+
+ return qemu_can_send_packet(&s->nc);
+}
+
+static void l2tpv3_send_completed(NetClientState *nc, ssize_t len)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+ l2tpv3_read_poll(s, true);
+}
+
+static void l2tpv3_poll(NetClientState *nc, bool enable)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+ l2tpv3_write_poll(s, enable);
+ l2tpv3_read_poll(s, enable);
+}
+
+static void l2tpv3_form_header(NetL2TPV3State *s)
+{
+ uint32_t *counter;
+
+ if (s->udp) {
+ stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET);
+ }
+ stl_be_p(
+ (uint32_t *) (s->header_buf + s->session_offset),
+ s->tx_session
+ );
+ if (s->cookie) {
+ if (s->cookie_is_64) {
+ stq_be_p(
+ (uint64_t *)(s->header_buf + s->cookie_offset),
+ s->tx_cookie
+ );
+ } else {
+ stl_be_p(
+ (uint32_t *) (s->header_buf + s->cookie_offset),
+ s->tx_cookie
+ );
+ }
+ }
+ if (s->has_counter) {
+ counter = (uint32_t *)(s->header_buf + s->counter_offset);
+ if (s->pin_counter) {
+ *counter = 0;
+ } else {
+ stl_be_p(counter, ++s->counter);
+ }
+ }
+}
+
+static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc,
+ const struct iovec *iov,
+ int iovcnt)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+ struct msghdr message;
+ int ret;
+
+ if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
+ error_report(
+ "iovec too long %d > %d, change l2tpv3.h",
+ iovcnt, MAX_L2TPV3_IOVCNT
+ );
+ return -1;
+ }
+ l2tpv3_form_header(s);
+ memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
+ s->vec->iov_base = s->header_buf;
+ s->vec->iov_len = s->offset;
+ message.msg_name = s->dgram_dst;
+ message.msg_namelen = s->dst_size;
+ message.msg_iov = s->vec;
+ message.msg_iovlen = iovcnt + 1;
+ message.msg_control = NULL;
+ message.msg_controllen = 0;
+ message.msg_flags = 0;
+ do {
+ ret = sendmsg(s->fd, &message, 0);
+ } while ((ret == -1) && (errno == EINTR));
+ if (ret > 0) {
+ ret -= s->offset;
+ } else if (ret == 0) {
+ /* belt and braces - should not occur on DGRAM
+ * we should get an error and never a 0 send
+ */
+ ret = iov_size(iov, iovcnt);
+ } else {
+ /* signal upper layer that socket buffer is full */
+ ret = -errno;
+ if (ret == -EAGAIN || ret == -ENOBUFS) {
+ l2tpv3_write_poll(s, true);
+ ret = 0;
+ }
+ }
+ return ret;
+}
+
+static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc,
+ const uint8_t *buf,
+ size_t size)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+ struct iovec *vec;
+ struct msghdr message;
+ ssize_t ret = 0;
+
+ l2tpv3_form_header(s);
+ vec = s->vec;
+ vec->iov_base = s->header_buf;
+ vec->iov_len = s->offset;
+ vec++;
+ vec->iov_base = (void *) buf;
+ vec->iov_len = size;
+ message.msg_name = s->dgram_dst;
+ message.msg_namelen = s->dst_size;
+ message.msg_iov = s->vec;
+ message.msg_iovlen = 2;
+ message.msg_control = NULL;
+ message.msg_controllen = 0;
+ message.msg_flags = 0;
+ do {
+ ret = sendmsg(s->fd, &message, 0);
+ } while ((ret == -1) && (errno == EINTR));
+ if (ret > 0) {
+ ret -= s->offset;
+ } else if (ret == 0) {
+ /* belt and braces - should not occur on DGRAM
+ * we should get an error and never a 0 send
+ */
+ ret = size;
+ } else {
+ ret = -errno;
+ if (ret == -EAGAIN || ret == -ENOBUFS) {
+ /* signal upper layer that socket buffer is full */
+ l2tpv3_write_poll(s, true);
+ ret = 0;
+ }
+ }
+ return ret;
+}
+
+static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
+{
+
+ uint32_t *session;
+ uint64_t cookie;
+
+ if ((!s->udp) && (!s->ipv6)) {
+ buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
+ }
+
+ /* we do not do a strict check for "data" packets as per
+ * the RFC spec because the pure IP spec does not have
+ * that anyway.
+ */
+
+ if (s->cookie) {
+ if (s->cookie_is_64) {
+ cookie = ldq_be_p(buf + s->cookie_offset);
+ } else {
+ cookie = ldl_be_p(buf + s->cookie_offset);
+ }
+ if (cookie != s->rx_cookie) {
+ if (!s->header_mismatch) {
+ error_report("unknown cookie id");
+ }
+ return -1;
+ }
+ }
+ session = (uint32_t *) (buf + s->session_offset);
+ if (ldl_be_p(session) != s->rx_session) {
+ if (!s->header_mismatch) {
+ error_report("session mismatch");
+ }
+ return -1;
+ }
+ return 0;
+}
+
+static void net_l2tpv3_process_queue(NetL2TPV3State *s)
+{
+ int size = 0;
+ struct iovec *vec;
+ bool bad_read;
+ int data_size;
+ struct mmsghdr *msgvec;
+
+ /* go into ring mode only if there is a "pending" tail */
+ if (s->queue_depth > 0) {
+ do {
+ msgvec = s->msgvec + s->queue_tail;
+ if (msgvec->msg_len > 0) {
+ data_size = msgvec->msg_len - s->header_size;
+ vec = msgvec->msg_hdr.msg_iov;
+ if ((data_size > 0) &&
+ (l2tpv3_verify_header(s, vec->iov_base) == 0)) {
+ vec++;
+ /* Use the legacy delivery for now, we will
+ * switch to using our own ring as a queueing mechanism
+ * at a later date
+ */
+ size = qemu_send_packet_async(
+ &s->nc,
+ vec->iov_base,
+ data_size,
+ l2tpv3_send_completed
+ );
+ if (size == 0) {
+ l2tpv3_read_poll(s, false);
+ }
+ bad_read = false;
+ } else {
+ bad_read = true;
+ if (!s->header_mismatch) {
+ /* report error only once */
+ error_report("l2tpv3 header verification failed");
+ s->header_mismatch = true;
+ }
+ }
+ } else {
+ bad_read = true;
+ }
+ s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT;
+ s->queue_depth--;
+ } while (
+ (s->queue_depth > 0) &&
+ qemu_can_send_packet(&s->nc) &&
+ ((size > 0) || bad_read)
+ );
+ }
+}
+
+static void net_l2tpv3_send(void *opaque)
+{
+ NetL2TPV3State *s = opaque;
+ int target_count, count;
+ struct mmsghdr *msgvec;
+
+ /* go into ring mode only if there is a "pending" tail */
+
+ if (s->queue_depth) {
+
+ /* The ring buffer we use has variable intake
+ * count of how much we can read varies - adjust accordingly
+ */
+
+ target_count = MAX_L2TPV3_MSGCNT - s->queue_depth;
+
+ /* Ensure we do not overrun the ring when we have
+ * a lot of enqueued packets
+ */
+
+ if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) {
+ target_count = MAX_L2TPV3_MSGCNT - s->queue_head;
+ }
+ } else {
+
+ /* we do not have any pending packets - we can use
+ * the whole message vector linearly instead of using
+ * it as a ring
+ */
+
+ s->queue_head = 0;
+ s->queue_tail = 0;
+ target_count = MAX_L2TPV3_MSGCNT;
+ }
+
+ msgvec = s->msgvec + s->queue_head;
+ if (target_count > 0) {
+ do {
+ count = recvmmsg(
+ s->fd,
+ msgvec,
+ target_count, MSG_DONTWAIT, NULL);
+ } while ((count == -1) && (errno == EINTR));
+ if (count < 0) {
+ /* Recv error - we still need to flush packets here,
+ * (re)set queue head to current position
+ */
+ count = 0;
+ }
+ s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT;
+ s->queue_depth += count;
+ }
+ net_l2tpv3_process_queue(s);
+}
+
+static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
+{
+ int i, j;
+ struct iovec *iov;
+ struct mmsghdr *cleanup = msgvec;
+ if (cleanup) {
+ for (i = 0; i < count; i++) {
+ if (cleanup->msg_hdr.msg_iov) {
+ iov = cleanup->msg_hdr.msg_iov;
+ for (j = 0; j < iovcount; j++) {
+ g_free(iov->iov_base);
+ iov++;
+ }
+ g_free(cleanup->msg_hdr.msg_iov);
+ }
+ cleanup++;
+ }
+ g_free(msgvec);
+ }
+}
+
+static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count)
+{
+ int i;
+ struct iovec *iov;
+ struct mmsghdr *msgvec, *result;
+
+ msgvec = g_malloc(sizeof(struct mmsghdr) * count);
+ result = msgvec;
+ for (i = 0; i < count ; i++) {
+ msgvec->msg_hdr.msg_name = NULL;
+ msgvec->msg_hdr.msg_namelen = 0;
+ iov = g_malloc(sizeof(struct iovec) * IOVSIZE);
+ msgvec->msg_hdr.msg_iov = iov;
+ iov->iov_base = g_malloc(s->header_size);
+ iov->iov_len = s->header_size;
+ iov++ ;
+ iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
+ iov->iov_len = BUFFER_SIZE;
+ msgvec->msg_hdr.msg_iovlen = 2;
+ msgvec->msg_hdr.msg_control = NULL;
+ msgvec->msg_hdr.msg_controllen = 0;
+ msgvec->msg_hdr.msg_flags = 0;
+ msgvec++;
+ }
+ return result;
+}
+
+static void net_l2tpv3_cleanup(NetClientState *nc)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+ qemu_purge_queued_packets(nc);
+ l2tpv3_read_poll(s, false);
+ l2tpv3_write_poll(s, false);
+ if (s->fd > 0) {
+ close(s->fd);
+ }
+ destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE);
+ g_free(s->vec);
+ g_free(s->header_buf);
+ g_free(s->dgram_dst);
+}
+
+static NetClientInfo net_l2tpv3_info = {
+ .type = NET_CLIENT_OPTIONS_KIND_L2TPV3,
+ .size = sizeof(NetL2TPV3State),
+ .receive = net_l2tpv3_receive_dgram,
+ .receive_iov = net_l2tpv3_receive_dgram_iov,
+ .poll = l2tpv3_poll,
+ .cleanup = net_l2tpv3_cleanup,
+};
+
+int net_init_l2tpv3(const NetClientOptions *opts,
+ const char *name,
+ NetClientState *peer)
+{
+
+
+ const NetdevL2TPv3Options *l2tpv3;
+ NetL2TPV3State *s;
+ NetClientState *nc;
+ int fd = -1, gairet;
+ struct addrinfo hints;
+ struct addrinfo *result = NULL;
+ char *srcport, *dstport;
+
+ nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
+
+ s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+ s->queue_head = 0;
+ s->queue_tail = 0;
+ s->header_mismatch = false;
+
+ assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3);
+ l2tpv3 = opts->l2tpv3;
+
+ if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
+ s->ipv6 = l2tpv3->ipv6;
+ } else {
+ s->ipv6 = false;
+ }
+
+ if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
+ error_report("l2tpv3_open : offset must be less than 256 bytes");
+ goto outerr;
+ }
+
+ if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) {
+ if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
+ s->cookie = true;
+ } else {
+ goto outerr;
+ }
+ } else {
+ s->cookie = false;
+ }
+
+ if (l2tpv3->has_cookie64 || l2tpv3->cookie64) {
+ s->cookie_is_64 = true;
+ } else {
+ s->cookie_is_64 = false;
+ }
+
+ if (l2tpv3->has_udp && l2tpv3->udp) {
+ s->udp = true;
+ if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) {
+ error_report("l2tpv3_open : need both src and dst port for udp");
+ goto outerr;
+ } else {
+ srcport = l2tpv3->srcport;
+ dstport = l2tpv3->dstport;
+ }
+ } else {
+ s->udp = false;
+ srcport = NULL;
+ dstport = NULL;
+ }
+
+
+ s->offset = 4;
+ s->session_offset = 0;
+ s->cookie_offset = 4;
+ s->counter_offset = 4;
+
+ s->tx_session = l2tpv3->txsession;
+ if (l2tpv3->has_rxsession) {
+ s->rx_session = l2tpv3->rxsession;
+ } else {
+ s->rx_session = s->tx_session;
+ }
+
+ if (s->cookie) {
+ s->rx_cookie = l2tpv3->rxcookie;
+ s->tx_cookie = l2tpv3->txcookie;
+ if (s->cookie_is_64 == true) {
+ /* 64 bit cookie */
+ s->offset += 8;
+ s->counter_offset += 8;
+ } else {
+ /* 32 bit cookie */
+ s->offset += 4;
+ s->counter_offset += 4;
+ }
+ }
+
+ memset(&hints, 0, sizeof(hints));
+
+ if (s->ipv6) {
+ hints.ai_family = AF_INET6;
+ } else {
+ hints.ai_family = AF_INET;
+ }
+ if (s->udp) {
+ hints.ai_socktype = SOCK_DGRAM;
+ hints.ai_protocol = 0;
+ s->offset += 4;
+ s->counter_offset += 4;
+ s->session_offset += 4;
+ s->cookie_offset += 4;
+ } else {
+ hints.ai_socktype = SOCK_RAW;
+ hints.ai_protocol = IPPROTO_L2TP;
+ }
+
+ gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result);
+
+ if ((gairet != 0) || (result == NULL)) {
+ error_report(
+ "l2tpv3_open : could not resolve src, errno = %s",
+ gai_strerror(gairet)
+ );
+ goto outerr;
+ }
+ fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol);
+ if (fd == -1) {
+ fd = -errno;
+ error_report("l2tpv3_open : socket creation failed, errno = %d", -fd);
+ freeaddrinfo(result);
+ goto outerr;
+ }
+ if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) {
+ error_report("l2tpv3_open : could not bind socket err=%i", errno);
+ goto outerr;
+ }
+ if (result) {
+ freeaddrinfo(result);
+ }
+
+ memset(&hints, 0, sizeof(hints));
+
+ if (s->ipv6) {
+ hints.ai_family = AF_INET6;
+ } else {
+ hints.ai_family = AF_INET;
+ }
+ if (s->udp) {
+ hints.ai_socktype = SOCK_DGRAM;
+ hints.ai_protocol = 0;
+ } else {
+ hints.ai_socktype = SOCK_RAW;
+ hints.ai_protocol = IPPROTO_L2TP;
+ }
+
+ result = NULL;
+ gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result);
+ if ((gairet != 0) || (result == NULL)) {
+ error_report(
+ "l2tpv3_open : could not resolve dst, error = %s",
+ gai_strerror(gairet)
+ );
+ goto outerr;
+ }
+
+ s->dgram_dst = g_malloc(sizeof(struct sockaddr_storage));
+ memset(s->dgram_dst, '\0' , sizeof(struct sockaddr_storage));
+ memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen);
+ s->dst_size = result->ai_addrlen;
+
+ if (result) {
+ freeaddrinfo(result);
+ }
+
+ if (l2tpv3->has_counter && l2tpv3->counter) {
+ s->has_counter = true;
+ s->offset += 4;
+ } else {
+ s->has_counter = false;
+ }
+
+ if (l2tpv3->has_pincounter && l2tpv3->pincounter) {
+ s->has_counter = true; /* pin counter implies that there is counter */
+ s->pin_counter = true;
+ } else {
+ s->pin_counter = false;
+ }
+
+ if (l2tpv3->has_offset) {
+ /* extra offset */
+ s->offset += l2tpv3->offset;
+ }
+
+ if ((s->ipv6) || (s->udp)) {
+ s->header_size = s->offset;
+ } else {
+ s->header_size = s->offset + sizeof(struct iphdr);
+ }
+
+ s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT);
+ s->vec = g_malloc(sizeof(struct iovec) * MAX_L2TPV3_IOVCNT);
+ s->header_buf = g_malloc(s->header_size);
+
+ qemu_set_nonblock(fd);
+
+ s->fd = fd;
+ s->counter = 0;
+
+ l2tpv3_read_poll(s, true);
+
+ snprintf(s->nc.info_str, sizeof(s->nc.info_str),
+ "l2tpv3: connected");
+ return 0;
+outerr:
+ qemu_del_net_client(nc);
+ if (fd > 0) {
+ close(fd);
+ }
+ if (result) {
+ freeaddrinfo(result);
+ }
+ return -1;
+}
+
diff --git a/net/net.c b/net/net.c
index 3dac29b844..e636071f0b 100644
--- a/net/net.c
+++ b/net/net.c
@@ -806,6 +806,9 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
#ifdef CONFIG_VHOST_NET_USED
[NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user,
#endif
+#ifdef CONFIG_LINUX
+ [NET_CLIENT_OPTIONS_KIND_L2TPV3] = net_init_l2tpv3,
+#endif
};
@@ -842,6 +845,9 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
#ifdef CONFIG_VHOST_NET_USED
case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
#endif
+#ifdef CONFIG_LINUX
+ case NET_CLIENT_OPTIONS_KIND_L2TPV3:
+#endif
break;
default:
diff --git a/qapi-schema.json b/qapi-schema.json
index e7727a1153..0000372deb 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2040,6 +2040,62 @@
'*udp': 'str' } }
##
+# @NetdevL2TPv3Options
+#
+# Connect the VLAN to Ethernet over L2TPv3 Static tunnel
+#
+# @src: source address
+#
+# @dst: destination address
+#
+# @srcport: #optional source port - mandatory for udp, optional for ip
+#
+# @dstport: #optional destination port - mandatory for udp, optional for ip
+#
+# @ipv6: #optional - force the use of ipv6
+#
+# @udp: #optional - use the udp version of l2tpv3 encapsulation
+#
+# @cookie64: #optional - use 64 bit coookies
+#
+# @counter: #optional have sequence counter
+#
+# @pincounter: #optional pin sequence counter to zero -
+# workaround for buggy implementations or
+# networks with packet reorder
+#
+# @txcookie: #optional 32 or 64 bit transmit cookie
+#
+# @rxcookie: #optional 32 or 64 bit receive cookie
+#
+# @txsession: 32 bit transmit session
+#
+# @rxsession: #optional 32 bit receive session - if not specified
+# set to the same value as transmit
+#
+# @offset: #optional additional offset - allows the insertion of
+# additional application-specific data before the packet payload
+#
+# Since 2.1
+##
+{ 'type': 'NetdevL2TPv3Options',
+ 'data': {
+ 'src': 'str',
+ 'dst': 'str',
+ '*srcport': 'str',
+ '*dstport': 'str',
+ '*ipv6': 'bool',
+ '*udp': 'bool',
+ '*cookie64': 'bool',
+ '*counter': 'bool',
+ '*pincounter': 'bool',
+ '*txcookie': 'uint64',
+ '*rxcookie': 'uint64',
+ 'txsession': 'uint32',
+ '*rxsession': 'uint32',
+ '*offset': 'uint32' } }
+
+##
# @NetdevVdeOptions
#
# Connect the VLAN to a vde switch running on the host.
@@ -2150,6 +2206,9 @@
# A discriminated record of network device traits.
#
# Since 1.2
+#
+# 'l2tpv3' - since 2.1
+#
##
{ 'union': 'NetClientOptions',
'data': {
@@ -2157,6 +2216,7 @@
'nic': 'NetLegacyNicOptions',
'user': 'NetdevUserOptions',
'tap': 'NetdevTapOptions',
+ 'l2tpv3': 'NetdevL2TPv3Options',
'socket': 'NetdevSocketOptions',
'vde': 'NetdevVdeOptions',
'dump': 'NetdevDumpOptions',
diff --git a/qemu-options.hx b/qemu-options.hx
index ff76ad4830..9e5468678b 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1433,6 +1433,29 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
" (default=" DEFAULT_BRIDGE_INTERFACE ") using the program 'helper'\n"
" (default=" DEFAULT_BRIDGE_HELPER ")\n"
#endif
+#ifdef __linux__
+ "-net l2tpv3[,vlan=n][,name=str],src=srcaddr,dst=dstaddr[,srcport=srcport][,dstport=dstport],txsession=txsession[,rxsession=rxsession][,ipv6=on/off][,udp=on/off][,cookie64=on/off][,counter][,pincounter][,txcookie=txcookie][,rxcookie=rxcookie][,offset=offset]\n"
+ " connect the VLAN to an Ethernet over L2TPv3 pseudowire\n"
+ " Linux kernel 3.3+ as well as most routers can talk\n"
+ " L2TPv3. This transport allows to connect a VM to a VM,\n"
+ " VM to a router and even VM to Host. It is a nearly-universal\n"
+ " standard (RFC3391). Note - this implementation uses static\n"
+ " pre-configured tunnels (same as the Linux kernel).\n"
+ " use 'src=' to specify source address\n"
+ " use 'dst=' to specify destination address\n"
+ " use 'udp=on' to specify udp encapsulation\n"
+ " use 'dstport=' to specify destination udp port\n"
+ " use 'dstport=' to specify destination udp port\n"
+ " use 'ipv6=on' to force v6\n"
+ " L2TPv3 uses cookies to prevent misconfiguration as\n"
+ " well as a weak security measure\n"
+ " use 'rxcookie=0x012345678' to specify a rxcookie\n"
+ " use 'txcookie=0x012345678' to specify a txcookie\n"
+ " use 'cookie64=on' to set cookie size to 64 bit, otherwise 32\n"
+ " use 'counter=off' to force a 'cut-down' L2TPv3 with no counter\n"
+ " use 'pincounter=on' to work around broken counter handling in peer\n"
+ " use 'offset=X' to add an extra offset between header and data\n"
+#endif
"-net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port]\n"
" connect the vlan 'n' to another VLAN using a socket connection\n"
"-net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port[,localaddr=addr]]\n"
@@ -1778,6 +1801,65 @@ qemu-system-i386 linux.img \
-net socket,mcast=239.192.168.1:1102,localaddr=1.2.3.4
@end example
+@item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}]
+@item -net l2tpv3[,vlan=@var{n}][,name=@var{name}],src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}]
+Connect VLAN @var{n} to L2TPv3 pseudowire. L2TPv3 (RFC3391) is a popular
+protocol to transport Ethernet (and other Layer 2) data frames between
+two systems. It is present in routers, firewalls and the Linux kernel
+(from version 3.3 onwards).
+
+This transport allows a VM to communicate to another VM, router or firewall directly.
+
+@item src=@var{srcaddr}
+ source address (mandatory)
+@item dst=@var{dstaddr}
+ destination address (mandatory)
+@item udp
+ select udp encapsulation (default is ip).
+@item srcport=@var{srcport}
+ source udp port.
+@item dstport=@var{dstport}
+ destination udp port.
+@item ipv6
+ force v6, otherwise defaults to v4.
+@item rxcookie=@var{rxcookie}
+@item txcookie=@var{txcookie}
+ Cookies are a weak form of security in the l2tpv3 specification.
+Their function is mostly to prevent misconfiguration. By default they are 32
+bit.
+@item cookie64
+ Set cookie size to 64 bit instead of the default 32
+@item counter=off
+ Force a 'cut-down' L2TPv3 with no counter as in
+draft-mkonstan-l2tpext-keyed-ipv6-tunnel-00
+@item pincounter=on
+ Work around broken counter handling in peer. This may also help on
+networks which have packet reorder.
+@item offset=@var{offset}
+ Add an extra offset between header and data
+
+For example, to attach a VM running on host 4.3.2.1 via L2TPv3 to the bridge br-lan
+on the remote Linux host 1.2.3.4:
+@example
+# Setup tunnel on linux host using raw ip as encapsulation
+# on 1.2.3.4
+ip l2tp add tunnel remote 4.3.2.1 local 1.2.3.4 tunnel_id 1 peer_tunnel_id 1 \
+ encap udp udp_sport 16384 udp_dport 16384
+ip l2tp add session tunnel_id 1 name vmtunnel0 session_id \
+ 0xFFFFFFFF peer_session_id 0xFFFFFFFF
+ifconfig vmtunnel0 mtu 1500
+ifconfig vmtunnel0 up
+brctl addif br-lan vmtunnel0
+
+
+# on 4.3.2.1
+# launch QEMU instance - if your network has reorder or is very lossy add ,pincounter
+
+qemu-system-i386 linux.img -net nic -net l2tpv3,src=4.2.3.1,dst=1.2.3.4,udp,srcport=16384,dstport=16384,rxsession=0xffffffff,txsession=0xffffffff,counter
+
+
+@end example
+
@item -netdev vde,id=@var{id}[,sock=@var{socketpath}][,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
@item -net vde[,vlan=@var{n}][,name=@var{name}][,sock=@var{socketpath}] [,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
Connect VLAN @var{n} to PORT @var{n} of a vde switch running on host and