diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 138 |
1 files changed, 100 insertions, 38 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0cfa7c0c1e80..4804645bdf02 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -247,6 +247,7 @@ #define pr_fmt(fmt) "TCP: " fmt +#include <crypto/hash.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> @@ -266,7 +267,6 @@ #include <linux/swap.h> #include <linux/cache.h> #include <linux/err.h> -#include <linux/crypto.h> #include <linux/time.h> #include <linux/slab.h> @@ -279,6 +279,7 @@ #include <asm/uaccess.h> #include <asm/ioctls.h> +#include <asm/unaligned.h> #include <net/busy_poll.h> int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; @@ -422,7 +423,8 @@ void tcp_init_sock(struct sock *sk) sk->sk_rcvbuf = sysctl_tcp_rmem[1]; local_bh_disable(); - sock_update_memcg(sk); + if (mem_cgroup_sockets_enabled) + sock_update_memcg(sk); sk_sockets_allocated_inc(sk); local_bh_enable(); } @@ -451,11 +453,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) unsigned int mask; struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); + int state; sock_rps_record_flow(sk); sock_poll_wait(file, sk_sleep(sk), wait); - if (sk->sk_state == TCP_LISTEN) + + state = sk_state_load(sk); + if (state == TCP_LISTEN) return inet_csk_listen_poll(sk); /* Socket is not locked. We are protected from async events @@ -492,14 +497,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) * NOTE. Check for TCP_CLOSE is added. The goal is to prevent * blocking on fresh not-connected or disconnected socket. --ANK */ - if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) + if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) mask |= POLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLIN | POLLRDNORM | POLLRDHUP; /* Connected or passive Fast Open socket? */ - if (sk->sk_state != TCP_SYN_SENT && - (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk)) { + if (state != TCP_SYN_SENT && + (state != TCP_SYN_RECV || tp->fastopen_rsk)) { int target = sock_rcvlowat(sk, 0, INT_MAX); if (tp->urg_seq == tp->copied_seq && @@ -507,9 +512,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) tp->urg_data) target++; - /* Potential race condition. If read of tp below will - * escape above sk->sk_state, we can be illegally awaken - * in SYN_* states. */ if (tp->rcv_nxt - tp->copied_seq >= target) mask |= POLLIN | POLLRDNORM; @@ -517,8 +519,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (sk_stream_is_writeable(sk)) { mask |= POLLOUT | POLLWRNORM; } else { /* send SIGIO later */ - set_bit(SOCK_ASYNC_NOSPACE, - &sk->sk_socket->flags); + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); /* Race breaker. If space is freed after @@ -906,7 +907,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, goto out_err; } - clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); mss_now = tcp_send_mss(sk, &size_goal, flags); copied = 0; @@ -939,7 +940,7 @@ new_segment: i = skb_shinfo(skb)->nr_frags; can_coalesce = skb_can_coalesce(skb, i, page, offset); - if (!can_coalesce && i >= MAX_SKB_FRAGS) { + if (!can_coalesce && i >= sysctl_max_skb_frags) { tcp_mark_push(tp, skb); goto new_segment; } @@ -1019,7 +1020,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, ssize_t res; if (!(sk->sk_route_caps & NETIF_F_SG) || - !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) + !sk_check_csum_caps(sk)) return sock_no_sendpage(sk->sk_socket, page, offset, size, flags); @@ -1134,7 +1135,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) } /* This should be in poll */ - clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); mss_now = tcp_send_mss(sk, &size_goal, flags); @@ -1176,7 +1177,7 @@ new_segment: /* * Check whether we can use HW checksum. */ - if (sk->sk_route_caps & NETIF_F_ALL_CSUM) + if (sk_check_csum_caps(sk)) skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); @@ -1212,7 +1213,7 @@ new_segment: if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i == MAX_SKB_FRAGS || !sg) { + if (i == sysctl_max_skb_frags || !sg) { tcp_mark_push(tp, skb); goto new_segment; } @@ -1934,7 +1935,7 @@ void tcp_set_state(struct sock *sk, int state) /* Change state AFTER socket is unhashed to avoid closed * socket sitting in hash tables. */ - sk->sk_state = state; + sk_state_store(sk, state); #ifdef STATE_TRACE SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); @@ -2638,13 +2639,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; unsigned int start; + u64 rate64; u32 rate; memset(info, 0, sizeof(*info)); if (sk->sk_type != SOCK_STREAM) return; - info->tcpi_state = sk->sk_state; + info->tcpi_state = sk_state_load(sk); + info->tcpi_ca_state = icsk->icsk_ca_state; info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_probes = icsk->icsk_probes_out; @@ -2672,7 +2675,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; - if (sk->sk_state == TCP_LISTEN) { + if (info->tcpi_state == TCP_LISTEN) { info->tcpi_unacked = sk->sk_ack_backlog; info->tcpi_sacked = sk->sk_max_ack_backlog; } else { @@ -2702,15 +2705,17 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_total_retrans = tp->total_retrans; rate = READ_ONCE(sk->sk_pacing_rate); - info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL; + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_pacing_rate); rate = READ_ONCE(sk->sk_max_pacing_rate); - info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_max_pacing_rate); do { start = u64_stats_fetch_begin_irq(&tp->syncp); - info->tcpi_bytes_acked = tp->bytes_acked; - info->tcpi_bytes_received = tp->bytes_received; + put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); + put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); info->tcpi_segs_out = tp->segs_out; info->tcpi_segs_in = tp->segs_in; @@ -2938,17 +2943,26 @@ static bool tcp_md5sig_pool_populated = false; static void __tcp_alloc_md5sig_pool(void) { + struct crypto_ahash *hash; int cpu; + hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(hash)) + return; + for_each_possible_cpu(cpu) { - if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) { - struct crypto_hash *hash; + struct ahash_request *req; - hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR_OR_NULL(hash)) - return; - per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash; - } + if (per_cpu(tcp_md5sig_pool, cpu).md5_req) + continue; + + req = ahash_request_alloc(hash, GFP_KERNEL); + if (!req) + return; + + ahash_request_set_callback(req, 0, NULL, NULL); + + per_cpu(tcp_md5sig_pool, cpu).md5_req = req; } /* before setting tcp_md5sig_pool_populated, we must commit all writes * to memory. See smp_rmb() in tcp_get_md5sig_pool() @@ -2998,7 +3012,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, { struct scatterlist sg; struct tcphdr hdr; - int err; /* We are not allowed to change tcphdr, make a local copy */ memcpy(&hdr, th, sizeof(hdr)); @@ -3006,8 +3019,8 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, /* options aren't included in the hash */ sg_init_one(&sg, &hdr, sizeof(hdr)); - err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(hdr)); - return err; + ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(hdr)); + return crypto_ahash_update(hp->md5_req); } EXPORT_SYMBOL(tcp_md5_hash_header); @@ -3016,7 +3029,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, { struct scatterlist sg; const struct tcphdr *tp = tcp_hdr(skb); - struct hash_desc *desc = &hp->md5_desc; + struct ahash_request *req = hp->md5_req; unsigned int i; const unsigned int head_data_len = skb_headlen(skb) > header_len ? skb_headlen(skb) - header_len : 0; @@ -3026,7 +3039,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, sg_init_table(&sg, 1); sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len); - if (crypto_hash_update(desc, &sg, head_data_len)) + ahash_request_set_crypt(req, &sg, NULL, head_data_len); + if (crypto_ahash_update(req)) return 1; for (i = 0; i < shi->nr_frags; ++i) { @@ -3036,7 +3050,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, sg_set_page(&sg, page, skb_frag_size(f), offset_in_page(offset)); - if (crypto_hash_update(desc, &sg, skb_frag_size(f))) + ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f)); + if (crypto_ahash_update(req)) return 1; } @@ -3053,7 +3068,8 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *ke struct scatterlist sg; sg_init_one(&sg, key->key, key->keylen); - return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen); + return crypto_ahash_update(hp->md5_req); } EXPORT_SYMBOL(tcp_md5_hash_key); @@ -3080,6 +3096,52 @@ void tcp_done(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_done); +int tcp_abort(struct sock *sk, int err) +{ + if (!sk_fullsock(sk)) { + if (sk->sk_state == TCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); + + local_bh_disable(); + inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, + req); + local_bh_enable(); + return 0; + } + sock_gen_put(sk); + return -EOPNOTSUPP; + } + + /* Don't race with userspace socket closes such as tcp_close. */ + lock_sock(sk); + + if (sk->sk_state == TCP_LISTEN) { + tcp_set_state(sk, TCP_CLOSE); + inet_csk_listen_stop(sk); + } + + /* Don't race with BH socket closes such as inet_csk_listen_stop. */ + local_bh_disable(); + bh_lock_sock(sk); + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_err = err; + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); + sk->sk_error_report(sk); + if (tcp_need_reset(sk->sk_state)) + tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_done(sk); + } + + bh_unlock_sock(sk); + local_bh_enable(); + release_sock(sk); + sock_put(sk); + return 0; +} +EXPORT_SYMBOL_GPL(tcp_abort); + extern struct tcp_congestion_ops tcp_reno; static __initdata unsigned long thash_entries; |