diff options
author | Linus Torvalds | 2017-09-06 23:45:08 +0200 |
---|---|---|
committer | Linus Torvalds | 2017-09-06 23:45:08 +0200 |
commit | aae3dbb4776e7916b6cd442d00159bea27a695c1 (patch) | |
tree | d074c5d783a81e7e2e084b1eba77f57459da7e37 /include/linux/skbuff.h | |
parent | Merge tag 'wberr-v4.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/jl... (diff) | |
parent | Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirshe... (diff) | |
download | kernel-qcow2-linux-aae3dbb4776e7916b6cd442d00159bea27a695c1.tar.gz kernel-qcow2-linux-aae3dbb4776e7916b6cd442d00159bea27a695c1.tar.xz kernel-qcow2-linux-aae3dbb4776e7916b6cd442d00159bea27a695c1.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) Support ipv6 checksum offload in sunvnet driver, from Shannon
Nelson.
2) Move to RB-tree instead of custom AVL code in inetpeer, from Eric
Dumazet.
3) Allow generic XDP to work on virtual devices, from John Fastabend.
4) Add bpf device maps and XDP_REDIRECT, which can be used to build
arbitrary switching frameworks using XDP. From John Fastabend.
5) Remove UFO offloads from the tree, gave us little other than bugs.
6) Remove the IPSEC flow cache, from Florian Westphal.
7) Support ipv6 route offload in mlxsw driver.
8) Support VF representors in bnxt_en, from Sathya Perla.
9) Add support for forward error correction modes to ethtool, from
Vidya Sagar Ravipati.
10) Add time filter for packet scheduler action dumping, from Jamal Hadi
Salim.
11) Extend the zerocopy sendmsg() used by virtio and tap to regular
sockets via MSG_ZEROCOPY. From Willem de Bruijn.
12) Significantly rework value tracking in the BPF verifier, from Edward
Cree.
13) Add new jump instructions to eBPF, from Daniel Borkmann.
14) Rework rtnetlink plumbing so that operations can be run without
taking the RTNL semaphore. From Florian Westphal.
15) Support XDP in tap driver, from Jason Wang.
16) Add 32-bit eBPF JIT for ARM, from Shubham Bansal.
17) Add Huawei hinic ethernet driver.
18) Allow to report MD5 keys in TCP inet_diag dumps, from Ivan
Delalande.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1780 commits)
i40e: point wb_desc at the nvm_wb_desc during i40e_read_nvm_aq
i40e: avoid NVM acquire deadlock during NVM update
drivers: net: xgene: Remove return statement from void function
drivers: net: xgene: Configure tx/rx delay for ACPI
drivers: net: xgene: Read tx/rx delay for ACPI
rocker: fix kcalloc parameter order
rds: Fix non-atomic operation on shared flag variable
net: sched: don't use GFP_KERNEL under spin lock
vhost_net: correctly check tx avail during rx busy polling
net: mdio-mux: add mdio_mux parameter to mdio_mux_init()
rxrpc: Make service connection lookup always check for retry
net: stmmac: Delete dead code for MDIO registration
gianfar: Fix Tx flow control deactivation
cxgb4: Ignore MPS_TX_INT_CAUSE[Bubble] for T6
cxgb4: Fix pause frame count in t4_get_port_stats
cxgb4: fix memory leak
tun: rename generic_xdp to skb_xdp
tun: reserve extra headroom only when XDP is set
net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping
net: dsa: bcm_sf2: Advertise number of egress queues
...
Diffstat (limited to 'include/linux/skbuff.h')
-rw-r--r-- | include/linux/skbuff.h | 201 |
1 files changed, 158 insertions, 43 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d67a8182e5eb..f751f3b93039 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -22,6 +22,7 @@ #include <linux/cache.h> #include <linux/rbtree.h> #include <linux/socket.h> +#include <linux/refcount.h> #include <linux/atomic.h> #include <asm/types.h> @@ -345,6 +346,42 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) frag->size -= delta; } +static inline bool skb_frag_must_loop(struct page *p) +{ +#if defined(CONFIG_HIGHMEM) + if (PageHighMem(p)) + return true; +#endif + return false; +} + +/** + * skb_frag_foreach_page - loop over pages in a fragment + * + * @f: skb frag to operate on + * @f_off: offset from start of f->page.p + * @f_len: length from f_off to loop over + * @p: (temp var) current page + * @p_off: (temp var) offset from start of current page, + * non-zero only on first page. + * @p_len: (temp var) length in current page, + * < PAGE_SIZE only on first and last page. + * @copied: (temp var) length so far, excluding current p_len. + * + * A fragment can hold a compound page, in which case per-page + * operations, notably kmap_atomic, must be called for each + * regular page. + */ +#define skb_frag_foreach_page(f, f_off, f_len, p, p_off, p_len, copied) \ + for (p = skb_frag_page(f) + ((f_off) >> PAGE_SHIFT), \ + p_off = (f_off) & (PAGE_SIZE - 1), \ + p_len = skb_frag_must_loop(p) ? \ + min_t(u32, f_len, PAGE_SIZE - p_off) : f_len, \ + copied = 0; \ + copied < f_len; \ + copied += p_len, p++, p_off = 0, \ + p_len = min_t(u32, f_len - copied, PAGE_SIZE)) \ + #define HAVE_HW_TIME_STAMP /** @@ -393,6 +430,7 @@ enum { SKBTX_SCHED_TSTAMP = 1 << 6, }; +#define SKBTX_ZEROCOPY_FRAG (SKBTX_DEV_ZEROCOPY | SKBTX_SHARED_FRAG) #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ SKBTX_SCHED_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) @@ -407,10 +445,46 @@ enum { */ struct ubuf_info { void (*callback)(struct ubuf_info *, bool zerocopy_success); - void *ctx; - unsigned long desc; + union { + struct { + unsigned long desc; + void *ctx; + }; + struct { + u32 id; + u16 len; + u16 zerocopy:1; + u32 bytelen; + }; + }; + refcount_t refcnt; + + struct mmpin { + struct user_struct *user; + unsigned int num_pg; + } mmp; }; +#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) + +struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size); +struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, + struct ubuf_info *uarg); + +static inline void sock_zerocopy_get(struct ubuf_info *uarg) +{ + refcount_inc(&uarg->refcnt); +} + +void sock_zerocopy_put(struct ubuf_info *uarg); +void sock_zerocopy_put_abort(struct ubuf_info *uarg); + +void sock_zerocopy_callback(struct ubuf_info *uarg, bool success); + +int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, + struct msghdr *msg, int len, + struct ubuf_info *uarg); + /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ @@ -463,39 +537,38 @@ enum { enum { SKB_GSO_TCPV4 = 1 << 0, - SKB_GSO_UDP = 1 << 1, /* This indicates the skb is from an untrusted source. */ - SKB_GSO_DODGY = 1 << 2, + SKB_GSO_DODGY = 1 << 1, /* This indicates the tcp segment has CWR set. */ - SKB_GSO_TCP_ECN = 1 << 3, + SKB_GSO_TCP_ECN = 1 << 2, - SKB_GSO_TCP_FIXEDID = 1 << 4, + SKB_GSO_TCP_FIXEDID = 1 << 3, - SKB_GSO_TCPV6 = 1 << 5, + SKB_GSO_TCPV6 = 1 << 4, - SKB_GSO_FCOE = 1 << 6, + SKB_GSO_FCOE = 1 << 5, - SKB_GSO_GRE = 1 << 7, + SKB_GSO_GRE = 1 << 6, - SKB_GSO_GRE_CSUM = 1 << 8, + SKB_GSO_GRE_CSUM = 1 << 7, - SKB_GSO_IPXIP4 = 1 << 9, + SKB_GSO_IPXIP4 = 1 << 8, - SKB_GSO_IPXIP6 = 1 << 10, + SKB_GSO_IPXIP6 = 1 << 9, - SKB_GSO_UDP_TUNNEL = 1 << 11, + SKB_GSO_UDP_TUNNEL = 1 << 10, - SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12, + SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, - SKB_GSO_PARTIAL = 1 << 13, + SKB_GSO_PARTIAL = 1 << 12, - SKB_GSO_TUNNEL_REMCSUM = 1 << 14, + SKB_GSO_TUNNEL_REMCSUM = 1 << 13, - SKB_GSO_SCTP = 1 << 15, + SKB_GSO_SCTP = 1 << 14, - SKB_GSO_ESP = 1 << 16, + SKB_GSO_ESP = 1 << 15, }; #if BITS_PER_LONG > 32 @@ -945,12 +1018,6 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } -struct sk_buff *__alloc_skb_head(gfp_t priority, int node); -static inline struct sk_buff *alloc_skb_head(gfp_t priority) -{ - return __alloc_skb_head(priority, -1); -} - struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); @@ -1145,8 +1212,6 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->hash; } -__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6); - static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6) { if (!skb->l4_hash && !skb->sw_hash) { @@ -1159,20 +1224,6 @@ static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 return skb->hash; } -__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl); - -static inline __u32 skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4) -{ - if (!skb->l4_hash && !skb->sw_hash) { - struct flow_keys keys; - __u32 hash = __get_hash_from_flowi4(fl4, &keys); - - __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); - } - - return skb->hash; -} - __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) @@ -1217,6 +1268,50 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) return &skb_shinfo(skb)->hwtstamps; } +static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) +{ + bool is_zcopy = skb && skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY; + + return is_zcopy ? skb_uarg(skb) : NULL; +} + +static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg) +{ + if (skb && uarg && !skb_zcopy(skb)) { + sock_zerocopy_get(uarg); + skb_shinfo(skb)->destructor_arg = uarg; + skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG; + } +} + +/* Release a reference on a zerocopy structure */ +static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) +{ + struct ubuf_info *uarg = skb_zcopy(skb); + + if (uarg) { + if (uarg->callback == sock_zerocopy_callback) { + uarg->zerocopy = uarg->zerocopy && zerocopy; + sock_zerocopy_put(uarg); + } else { + uarg->callback(uarg, zerocopy); + } + + skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; + } +} + +/* Abort a zerocopy operation and revert zckey on error in send syscall */ +static inline void skb_zcopy_abort(struct sk_buff *skb) +{ + struct ubuf_info *uarg = skb_zcopy(skb); + + if (uarg) { + sock_zerocopy_put_abort(uarg); + skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; + } +} + /** * skb_queue_empty - check if a queue is empty * @list: queue head @@ -1799,13 +1894,18 @@ static inline unsigned int skb_headlen(const struct sk_buff *skb) return skb->len - skb->data_len; } -static inline unsigned int skb_pagelen(const struct sk_buff *skb) +static inline unsigned int __skb_pagelen(const struct sk_buff *skb) { unsigned int i, len = 0; for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--) len += skb_frag_size(&skb_shinfo(skb)->frags[i]); - return len + skb_headlen(skb); + return len; +} + +static inline unsigned int skb_pagelen(const struct sk_buff *skb) +{ + return skb_headlen(skb) + __skb_pagelen(skb); } /** @@ -2450,7 +2550,17 @@ static inline void skb_orphan(struct sk_buff *skb) */ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask) { - if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY))) + if (likely(!skb_zcopy(skb))) + return 0; + if (skb_uarg(skb)->callback == sock_zerocopy_callback) + return 0; + return skb_copy_ubufs(skb, gfp_mask); +} + +/* Frags must be orphaned, even if refcounted, if skb might loop to rx path */ +static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask) +{ + if (likely(!skb_zcopy(skb))) return 0; return skb_copy_ubufs(skb, gfp_mask); } @@ -2899,6 +3009,8 @@ static inline int skb_add_data(struct sk_buff *skb, static inline bool skb_can_coalesce(struct sk_buff *skb, int i, const struct page *page, int off) { + if (skb_zcopy(skb)) + return false; if (i) { const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; @@ -3153,6 +3265,9 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, unsigned int flags); +int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, + int len); +int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, |