diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 128 |
1 files changed, 76 insertions, 52 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1c2648bbac4b..4fdbf4e56797 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -73,7 +73,6 @@ #include <net/timewait_sock.h> #include <net/xfrm.h> #include <net/secure_seq.h> -#include <net/tcp_memcontrol.h> #include <net/busy_poll.h> #include <linux/inet.h> @@ -82,7 +81,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> -#include <linux/crypto.h> +#include <crypto/hash.h> #include <linux/scatterlist.h> int sysctl_tcp_tw_reuse __read_mostly; @@ -312,7 +311,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk) /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */ -void tcp_req_err(struct sock *sk, u32 seq) +void tcp_req_err(struct sock *sk, u32 seq, bool abort) { struct request_sock *req = inet_reqsk(sk); struct net *net = sock_net(sk); @@ -324,7 +323,7 @@ void tcp_req_err(struct sock *sk, u32 seq) if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - } else { + } else if (abort) { /* * Still in SYN_RECV, just remove it silently. * There is no good way to pass the error to the newly @@ -384,7 +383,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) } seq = ntohl(th->seq); if (sk->sk_state == TCP_NEW_SYN_RECV) - return tcp_req_err(sk, seq); + return tcp_req_err(sk, seq, + type == ICMP_PARAMETERPROB || + type == ICMP_TIME_EXCEEDED || + (type == ICMP_DEST_UNREACH && + (code == ICMP_NET_UNREACH || + code == ICMP_HOST_UNREACH))); bh_lock_sock(sk); /* If too many ICMPs get dropped on busy @@ -587,7 +591,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) } rep; struct ip_reply_arg arg; #ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *key; + struct tcp_md5sig_key *key = NULL; const __u8 *hash_location = NULL; unsigned char newhash[16]; int genhash; @@ -627,7 +631,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); #ifdef CONFIG_TCP_MD5SIG hash_location = tcp_parse_md5sig_option(th); - if (!sk && hash_location) { + if (sk && sk_fullsock(sk)) { + key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *) + &ip_hdr(skb)->saddr, AF_INET); + } else if (hash_location) { /* * active side is lost. Try to find listening socket through * source port, and then find md5 key through listening socket. @@ -651,10 +658,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) goto release_sk1; - } else { - key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *) - &ip_hdr(skb)->saddr, - AF_INET) : NULL; } if (key) { @@ -675,7 +678,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; - arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; + arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0; + /* When socket is gone, all binding information is lost. * routing might fail in this case. No choice here, if we choose to force * input interface, we will misroute in case of asymmetric route. @@ -683,6 +687,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) if (sk) arg.bound_dev_if = sk->sk_bound_dev_if; + BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != + offsetof(struct inet_timewait_sock, tw_bound_dev_if)); + arg.tos = ip_hdr(skb)->tos; ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -705,7 +712,8 @@ release_sk1: outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, +static void tcp_v4_send_ack(struct net *net, + struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int reply_flags, u8 tos) @@ -720,7 +728,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ]; } rep; struct ip_reply_arg arg; - struct net *net = dev_net(skb_dst(skb)->dev); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -782,7 +789,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v4_send_ack(sock_net(sk), skb, + tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, @@ -801,8 +809,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ - tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? - tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : + tcp_sk(sk)->snd_nxt; + + tcp_v4_send_ack(sock_net(sk), skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, tcp_time_stamp, req->ts_recent, @@ -921,7 +931,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, } md5sig = rcu_dereference_protected(tp->md5sig_info, - sock_owned_by_user(sk)); + sock_owned_by_user(sk) || + lockdep_is_held(&sk->sk_lock.slock)); if (!md5sig) { md5sig = kmalloc(sizeof(*md5sig), gfp); if (!md5sig) @@ -1028,21 +1039,22 @@ static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, bp->len = cpu_to_be16(nbytes); sg_init_one(&sg, bp, sizeof(*bp)); - return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp)); + return crypto_ahash_update(hp->md5_req); } static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th) { struct tcp_md5sig_pool *hp; - struct hash_desc *desc; + struct ahash_request *req; hp = tcp_get_md5sig_pool(); if (!hp) goto clear_hash_noput; - desc = &hp->md5_desc; + req = hp->md5_req; - if (crypto_hash_init(desc)) + if (crypto_ahash_init(req)) goto clear_hash; if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) goto clear_hash; @@ -1050,7 +1062,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, goto clear_hash; if (tcp_md5_hash_key(hp, key)) goto clear_hash; - if (crypto_hash_final(desc, md5_hash)) + ahash_request_set_crypt(req, NULL, md5_hash, 0); + if (crypto_ahash_final(req)) goto clear_hash; tcp_put_md5sig_pool(); @@ -1068,7 +1081,7 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sk_buff *skb) { struct tcp_md5sig_pool *hp; - struct hash_desc *desc; + struct ahash_request *req; const struct tcphdr *th = tcp_hdr(skb); __be32 saddr, daddr; @@ -1084,9 +1097,9 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, hp = tcp_get_md5sig_pool(); if (!hp) goto clear_hash_noput; - desc = &hp->md5_desc; + req = hp->md5_req; - if (crypto_hash_init(desc)) + if (crypto_ahash_init(req)) goto clear_hash; if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) @@ -1097,7 +1110,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, goto clear_hash; if (tcp_md5_hash_key(hp, key)) goto clear_hash; - if (crypto_hash_final(desc, md5_hash)) + ahash_request_set_crypt(req, NULL, md5_hash, 0); + if (crypto_ahash_final(req)) goto clear_hash; tcp_put_md5sig_pool(); @@ -1275,6 +1289,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, ireq = inet_rsk(req); sk_daddr_set(newsk, ireq->ir_rmt_addr); sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); + newsk->sk_bound_dev_if = ireq->ir_iif; newinet->inet_saddr = ireq->ir_loc_addr; inet_opt = ireq->opt; rcu_assign_pointer(newinet->inet_opt, inet_opt); @@ -1326,6 +1341,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); + if (*own_req) + tcp_move_syn(newtp, req); return newsk; @@ -1490,7 +1507,7 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) if (likely(sk->sk_rx_dst)) skb_dst_drop(skb); else - skb_dst_force(skb); + skb_dst_force_safe(skb); __skb_queue_tail(&tp->ucopy.prequeue, skb); tp->ucopy.memory += skb->truesize; @@ -1583,28 +1600,30 @@ process: if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); - struct sock *nsk = NULL; + struct sock *nsk; sk = req->rsk_listener; - if (tcp_v4_inbound_md5_hash(sk, skb)) - goto discard_and_relse; - if (likely(sk->sk_state == TCP_LISTEN)) { - nsk = tcp_check_req(sk, skb, req, false); - } else { + if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) { + reqsk_put(req); + goto discard_it; + } + if (unlikely(sk->sk_state != TCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + sock_hold(sk); + nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); - goto discard_it; + goto discard_and_relse; } if (nsk == sk) { - sock_hold(sk); reqsk_put(req); } else if (tcp_child_process(sk, nsk, skb)) { tcp_v4_send_reset(nsk, skb); - goto discard_it; + goto discard_and_relse; } else { + sock_put(sk); return 0; } } @@ -1702,7 +1721,9 @@ do_time_wait: tcp_v4_timewait_ack(sk, skb); break; case TCP_TW_RST: - goto no_tcp_socket; + tcp_v4_send_reset(sk, skb); + inet_twsk_deschedule_put(inet_twsk(sk)); + goto discard_it; case TCP_TW_SUCCESS:; } goto discard_it; @@ -1718,8 +1739,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - if (dst) { - dst_hold(dst); + if (dst && dst_hold_safe(dst)) { sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; } @@ -1810,7 +1830,9 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_saved_syn_free(tp); sk_sockets_allocated_dec(sk); - sock_release_memcg(sk); + + if (mem_cgroup_sockets_enabled && sk->sk_memcg) + sock_release_memcg(sk); } EXPORT_SYMBOL(tcp_v4_destroy_sock); @@ -2156,6 +2178,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) __u16 destp = ntohs(inet->inet_dport); __u16 srcp = ntohs(inet->inet_sport); int rx_queue; + int state; if (icsk->icsk_pending == ICSK_TIME_RETRANS || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || @@ -2173,17 +2196,18 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) timer_expires = jiffies; } - if (sk->sk_state == TCP_LISTEN) + state = sk_state_load(sk); + if (state == TCP_LISTEN) rx_queue = sk->sk_ack_backlog; else - /* - * because we dont lock socket, we might find a transient negative value + /* Because we don't lock the socket, + * we might find a transient negative value. */ rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", - i, src, srcp, dest, destp, sk->sk_state, + i, src, srcp, dest, destp, state, tp->write_seq - tp->snd_una, rx_queue, timer_active, @@ -2197,8 +2221,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, - sk->sk_state == TCP_LISTEN ? - (fastopenq ? fastopenq->max_qlen : 0) : + state == TCP_LISTEN ? + fastopenq->max_qlen : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); } @@ -2332,11 +2356,7 @@ struct proto tcp_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif -#ifdef CONFIG_MEMCG_KMEM - .init_cgroup = tcp_init_cgroup, - .destroy_cgroup = tcp_destroy_cgroup, - .proto_cgroup = tcp_proto_cgroup, -#endif + .diag_destroy = tcp_abort, }; EXPORT_SYMBOL(tcp_prot); @@ -2374,6 +2394,10 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; + net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; + net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; + net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; + return 0; fail: tcp_sk_exit(net); |