From 02c558b2d5d679fbbcaa5b9689484c7e0f8abb7b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 16 Oct 2018 11:08:04 -0700 Subject: bpf: sockmap, support for msg_peek in sk_msg with redirect ingress This adds support for the MSG_PEEK flag when doing redirect to ingress and receiving on the sk_msg psock queue. Previously the flag was being ignored which could confuse applications if they expected the flag to work as normal. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- net/ipv4/tcp_bpf.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'net/ipv4/tcp_bpf.c') diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index f9d3cf185827..b7918d4caa30 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -39,17 +39,19 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock, } int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, - struct msghdr *msg, int len) + struct msghdr *msg, int len, int flags) { struct iov_iter *iter = &msg->msg_iter; + int peek = flags & MSG_PEEK; int i, ret, copied = 0; + struct sk_msg *msg_rx; + + msg_rx = list_first_entry_or_null(&psock->ingress_msg, + struct sk_msg, list); while (copied != len) { struct scatterlist *sge; - struct sk_msg *msg_rx; - msg_rx = list_first_entry_or_null(&psock->ingress_msg, - struct sk_msg, list); if (unlikely(!msg_rx)) break; @@ -70,22 +72,30 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, } copied += copy; - sge->offset += copy; - sge->length -= copy; - sk_mem_uncharge(sk, copy); - msg_rx->sg.size -= copy; - if (!sge->length) { - i++; - if (i == MAX_SKB_FRAGS) - i = 0; - if (!msg_rx->skb) - put_page(page); + if (likely(!peek)) { + sge->offset += copy; + sge->length -= copy; + sk_mem_uncharge(sk, copy); + msg_rx->sg.size -= copy; + + if (!sge->length) { + sk_msg_iter_var_next(i); + if (!msg_rx->skb) + put_page(page); + } + } else { + sk_msg_iter_var_next(i); } if (copied == len) break; } while (i != msg_rx->sg.end); + if (unlikely(peek)) { + msg_rx = list_next_entry(msg_rx, list); + continue; + } + msg_rx->sg.start = i; if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { list_del(&msg_rx->list); @@ -93,6 +103,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, consume_skb(msg_rx->skb); kfree(msg_rx); } + msg_rx = list_first_entry_or_null(&psock->ingress_msg, + struct sk_msg, list); } return copied; @@ -115,7 +127,7 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); lock_sock(sk); msg_bytes_ready: - copied = __tcp_bpf_recvmsg(sk, psock, msg, len); + copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags); if (!copied) { int data, err = 0; long timeo; -- cgit v1.2.3-55-g7522