summaryrefslogtreecommitdiffstats
path: root/include/net/ip.h
diff options
context:
space:
mode:
authorEric Dumazet2014-06-02 14:26:03 +0200
committerDavid S. Miller2014-06-02 20:00:41 +0200
commit73f156a6e8c1074ac6327e0abd1169e95eb66463 (patch)
tree2c8b222f21784e738c397ba95dee70a8f256ea64 /include/net/ip.h
parentof: of_mdio: export symbol of_mdiobus_link_phydev (diff)
downloadkernel-qcow2-linux-73f156a6e8c1074ac6327e0abd1169e95eb66463.tar.gz
kernel-qcow2-linux-73f156a6e8c1074ac6327e0abd1169e95eb66463.tar.xz
kernel-qcow2-linux-73f156a6e8c1074ac6327e0abd1169e95eb66463.zip
inetpeer: get rid of ip_id_count
Ideally, we would need to generate IP ID using a per destination IP generator. linux kernels used inet_peer cache for this purpose, but this had a huge cost on servers disabling MTU discovery. 1) each inet_peer struct consumes 192 bytes 2) inetpeer cache uses a binary tree of inet_peer structs, with a nominal size of ~66000 elements under load. 3) lookups in this tree are hitting a lot of cache lines, as tree depth is about 20. 4) If server deals with many tcp flows, we have a high probability of not finding the inet_peer, allocating a fresh one, inserting it in the tree with same initial ip_id_count, (cf secure_ip_id()) 5) We garbage collect inet_peer aggressively. IP ID generation do not have to be 'perfect' Goal is trying to avoid duplicates in a short period of time, so that reassembly units have a chance to complete reassembly of fragments belonging to one message before receiving other fragments with a recycled ID. We simply use an array of generators, and a Jenkin hash using the dst IP as a key. ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it belongs (it is only used from this file) secure_ip_id() and secure_ipv6_id() no longer are needed. Rename ip_select_ident_more() to ip_select_ident_segs() to avoid unnecessary decrement/increment of the number of segments. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net/ip.h')
-rw-r--r--include/net/ip.h40
1 files changed, 23 insertions, 17 deletions
diff --git a/include/net/ip.h b/include/net/ip.h
index 2e4947895d75..0e795df05ec9 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -309,9 +309,19 @@ static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
}
}
-void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
+#define IP_IDENTS_SZ 2048u
+extern atomic_t *ip_idents;
-static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk)
+static inline u32 ip_idents_reserve(u32 hash, int segs)
+{
+ atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ;
+
+ return atomic_add_return(segs, id_ptr) - segs;
+}
+
+void __ip_select_ident(struct iphdr *iph, int segs);
+
+static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs)
{
struct iphdr *iph = ip_hdr(skb);
@@ -321,24 +331,20 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s
* does not change, they drop every other packet in
* a TCP stream using header compression.
*/
- iph->id = (sk && inet_sk(sk)->inet_daddr) ?
- htons(inet_sk(sk)->inet_id++) : 0;
- } else
- __ip_select_ident(iph, dst, 0);
-}
-
-static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more)
-{
- struct iphdr *iph = ip_hdr(skb);
-
- if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
if (sk && inet_sk(sk)->inet_daddr) {
iph->id = htons(inet_sk(sk)->inet_id);
- inet_sk(sk)->inet_id += 1 + more;
- } else
+ inet_sk(sk)->inet_id += segs;
+ } else {
iph->id = 0;
- } else
- __ip_select_ident(iph, dst, more);
+ }
+ } else {
+ __ip_select_ident(iph, segs);
+ }
+}
+
+static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk)
+{
+ ip_select_ident_segs(skb, sk, 1);
}
static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)