diff options
Diffstat (limited to 'include/linux/skbuff.h')
-rw-r--r-- | include/linux/skbuff.h | 201 |
1 files changed, 158 insertions, 43 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d67a8182e5eb..f751f3b93039 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -22,6 +22,7 @@ #include <linux/cache.h> #include <linux/rbtree.h> #include <linux/socket.h> +#include <linux/refcount.h> #include <linux/atomic.h> #include <asm/types.h> @@ -345,6 +346,42 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) frag->size -= delta; } +static inline bool skb_frag_must_loop(struct page *p) +{ +#if defined(CONFIG_HIGHMEM) + if (PageHighMem(p)) + return true; +#endif + return false; +} + +/** + * skb_frag_foreach_page - loop over pages in a fragment + * + * @f: skb frag to operate on + * @f_off: offset from start of f->page.p + * @f_len: length from f_off to loop over + * @p: (temp var) current page + * @p_off: (temp var) offset from start of current page, + * non-zero only on first page. + * @p_len: (temp var) length in current page, + * < PAGE_SIZE only on first and last page. + * @copied: (temp var) length so far, excluding current p_len. + * + * A fragment can hold a compound page, in which case per-page + * operations, notably kmap_atomic, must be called for each + * regular page. + */ +#define skb_frag_foreach_page(f, f_off, f_len, p, p_off, p_len, copied) \ + for (p = skb_frag_page(f) + ((f_off) >> PAGE_SHIFT), \ + p_off = (f_off) & (PAGE_SIZE - 1), \ + p_len = skb_frag_must_loop(p) ? \ + min_t(u32, f_len, PAGE_SIZE - p_off) : f_len, \ + copied = 0; \ + copied < f_len; \ + copied += p_len, p++, p_off = 0, \ + p_len = min_t(u32, f_len - copied, PAGE_SIZE)) \ + #define HAVE_HW_TIME_STAMP /** @@ -393,6 +430,7 @@ enum { SKBTX_SCHED_TSTAMP = 1 << 6, }; +#define SKBTX_ZEROCOPY_FRAG (SKBTX_DEV_ZEROCOPY | SKBTX_SHARED_FRAG) #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ SKBTX_SCHED_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) @@ -407,10 +445,46 @@ enum { */ struct ubuf_info { void (*callback)(struct ubuf_info *, bool zerocopy_success); - void *ctx; - unsigned long desc; + union { + struct { + unsigned long desc; + void *ctx; + }; + struct { + u32 id; + u16 len; + u16 zerocopy:1; + u32 bytelen; + }; + }; + refcount_t refcnt; + + struct mmpin { + struct user_struct *user; + unsigned int num_pg; + } mmp; }; +#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) + +struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size); +struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, + struct ubuf_info *uarg); + +static inline void sock_zerocopy_get(struct ubuf_info *uarg) +{ + refcount_inc(&uarg->refcnt); +} + +void sock_zerocopy_put(struct ubuf_info *uarg); +void sock_zerocopy_put_abort(struct ubuf_info *uarg); + +void sock_zerocopy_callback(struct ubuf_info *uarg, bool success); + +int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, + struct msghdr *msg, int len, + struct ubuf_info *uarg); + /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ @@ -463,39 +537,38 @@ enum { enum { SKB_GSO_TCPV4 = 1 << 0, - SKB_GSO_UDP = 1 << 1, /* This indicates the skb is from an untrusted source. */ - SKB_GSO_DODGY = 1 << 2, + SKB_GSO_DODGY = 1 << 1, /* This indicates the tcp segment has CWR set. */ - SKB_GSO_TCP_ECN = 1 << 3, + SKB_GSO_TCP_ECN = 1 << 2, - SKB_GSO_TCP_FIXEDID = 1 << 4, + SKB_GSO_TCP_FIXEDID = 1 << 3, - SKB_GSO_TCPV6 = 1 << 5, + SKB_GSO_TCPV6 = 1 << 4, - SKB_GSO_FCOE = 1 << 6, + SKB_GSO_FCOE = 1 << 5, - SKB_GSO_GRE = 1 << 7, + SKB_GSO_GRE = 1 << 6, - SKB_GSO_GRE_CSUM = 1 << 8, + SKB_GSO_GRE_CSUM = 1 << 7, - SKB_GSO_IPXIP4 = 1 << 9, + SKB_GSO_IPXIP4 = 1 << 8, - SKB_GSO_IPXIP6 = 1 << 10, + SKB_GSO_IPXIP6 = 1 << 9, - SKB_GSO_UDP_TUNNEL = 1 << 11, + SKB_GSO_UDP_TUNNEL = 1 << 10, - SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12, + SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, - SKB_GSO_PARTIAL = 1 << 13, + SKB_GSO_PARTIAL = 1 << 12, - SKB_GSO_TUNNEL_REMCSUM = 1 << 14, + SKB_GSO_TUNNEL_REMCSUM = 1 << 13, - SKB_GSO_SCTP = 1 << 15, + SKB_GSO_SCTP = 1 << 14, - SKB_GSO_ESP = 1 << 16, + SKB_GSO_ESP = 1 << 15, }; #if BITS_PER_LONG > 32 @@ -945,12 +1018,6 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } -struct sk_buff *__alloc_skb_head(gfp_t priority, int node); -static inline struct sk_buff *alloc_skb_head(gfp_t priority) -{ - return __alloc_skb_head(priority, -1); -} - struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); @@ -1145,8 +1212,6 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->hash; } -__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6); - static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6) { if (!skb->l4_hash && !skb->sw_hash) { @@ -1159,20 +1224,6 @@ static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 return skb->hash; } -__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl); - -static inline __u32 skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4) -{ - if (!skb->l4_hash && !skb->sw_hash) { - struct flow_keys keys; - __u32 hash = __get_hash_from_flowi4(fl4, &keys); - - __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); - } - - return skb->hash; -} - __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) @@ -1217,6 +1268,50 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) return &skb_shinfo(skb)->hwtstamps; } +static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) +{ + bool is_zcopy = skb && skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY; + + return is_zcopy ? skb_uarg(skb) : NULL; +} + +static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg) +{ + if (skb && uarg && !skb_zcopy(skb)) { + sock_zerocopy_get(uarg); + skb_shinfo(skb)->destructor_arg = uarg; + skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG; + } +} + +/* Release a reference on a zerocopy structure */ +static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) +{ + struct ubuf_info *uarg = skb_zcopy(skb); + + if (uarg) { + if (uarg->callback == sock_zerocopy_callback) { + uarg->zerocopy = uarg->zerocopy && zerocopy; + sock_zerocopy_put(uarg); + } else { + uarg->callback(uarg, zerocopy); + } + + skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; + } +} + +/* Abort a zerocopy operation and revert zckey on error in send syscall */ +static inline void skb_zcopy_abort(struct sk_buff *skb) +{ + struct ubuf_info *uarg = skb_zcopy(skb); + + if (uarg) { + sock_zerocopy_put_abort(uarg); + skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; + } +} + /** * skb_queue_empty - check if a queue is empty * @list: queue head @@ -1799,13 +1894,18 @@ static inline unsigned int skb_headlen(const struct sk_buff *skb) return skb->len - skb->data_len; } -static inline unsigned int skb_pagelen(const struct sk_buff *skb) +static inline unsigned int __skb_pagelen(const struct sk_buff *skb) { unsigned int i, len = 0; for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--) len += skb_frag_size(&skb_shinfo(skb)->frags[i]); - return len + skb_headlen(skb); + return len; +} + +static inline unsigned int skb_pagelen(const struct sk_buff *skb) +{ + return skb_headlen(skb) + __skb_pagelen(skb); } /** @@ -2450,7 +2550,17 @@ static inline void skb_orphan(struct sk_buff *skb) */ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask) { - if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY))) + if (likely(!skb_zcopy(skb))) + return 0; + if (skb_uarg(skb)->callback == sock_zerocopy_callback) + return 0; + return skb_copy_ubufs(skb, gfp_mask); +} + +/* Frags must be orphaned, even if refcounted, if skb might loop to rx path */ +static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask) +{ + if (likely(!skb_zcopy(skb))) return 0; return skb_copy_ubufs(skb, gfp_mask); } @@ -2899,6 +3009,8 @@ static inline int skb_add_data(struct sk_buff *skb, static inline bool skb_can_coalesce(struct sk_buff *skb, int i, const struct page *page, int off) { + if (skb_zcopy(skb)) + return false; if (i) { const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; @@ -3153,6 +3265,9 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, unsigned int flags); +int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, + int len); +int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, |