From 52267790ef52d7513879238ca9fac22c1733e0e3 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:39 -0400 Subject: sock: add MSG_ZEROCOPY The kernel supports zerocopy sendmsg in virtio and tap. Expand the infrastructure to support other socket types. Introduce a completion notification channel over the socket error queue. Notifications are returned with ee_origin SO_EE_ORIGIN_ZEROCOPY. ee_errno is 0 to avoid blocking the send/recv path on receiving notifications. Add reference counting, to support the skb split, merge, resize and clone operations possible with SOCK_STREAM and other socket types. The patch does not yet modify any datapaths. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2f64e2bbb592..59cff7aa494e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -429,6 +429,7 @@ enum { SKBTX_SCHED_TSTAMP = 1 << 6, }; +#define SKBTX_ZEROCOPY_FRAG (SKBTX_DEV_ZEROCOPY | SKBTX_SHARED_FRAG) #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ SKBTX_SCHED_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) @@ -445,8 +446,28 @@ struct ubuf_info { void (*callback)(struct ubuf_info *, bool zerocopy_success); void *ctx; unsigned long desc; + u16 zerocopy:1; + atomic_t refcnt; }; +#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) + +struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size); + +static inline void sock_zerocopy_get(struct ubuf_info *uarg) +{ + atomic_inc(&uarg->refcnt); +} + +void sock_zerocopy_put(struct ubuf_info *uarg); +void sock_zerocopy_put_abort(struct ubuf_info *uarg); + +void sock_zerocopy_callback(struct ubuf_info *uarg, bool success); + +int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, + struct msghdr *msg, int len, + struct ubuf_info *uarg); + /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ @@ -1214,6 +1235,45 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) return &skb_shinfo(skb)->hwtstamps; } +static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) +{ + bool is_zcopy = skb && skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY; + + return is_zcopy ? skb_uarg(skb) : NULL; +} + +static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg) +{ + if (skb && uarg && !skb_zcopy(skb)) { + sock_zerocopy_get(uarg); + skb_shinfo(skb)->destructor_arg = uarg; + skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG; + } +} + +/* Release a reference on a zerocopy structure */ +static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) +{ + struct ubuf_info *uarg = skb_zcopy(skb); + + if (uarg) { + uarg->zerocopy = uarg->zerocopy && zerocopy; + sock_zerocopy_put(uarg); + skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; + } +} + +/* Abort a zerocopy operation and revert zckey on error in send syscall */ +static inline void skb_zcopy_abort(struct sk_buff *skb) +{ + struct ubuf_info *uarg = skb_zcopy(skb); + + if (uarg) { + sock_zerocopy_put_abort(uarg); + skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; + } +} + /** * skb_queue_empty - check if a queue is empty * @list: queue head -- cgit v1.2.3-55-g7522