summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller2019-01-27 22:29:43 +0100
committerDavid S. Miller2019-01-27 22:29:43 +0100
commit889865cf54bddd8a4e258a11225bcbb646f90863 (patch)
treef0afde54fb516f0293e849ade6c738ece48f16c1
parentnet: ipv4: ip_input: fix blank line coding style issues (diff)
parenttcp: change pingpong threshold to 3 (diff)
downloadkernel-qcow2-linux-889865cf54bddd8a4e258a11225bcbb646f90863.tar.gz
kernel-qcow2-linux-889865cf54bddd8a4e258a11225bcbb646f90863.tar.xz
kernel-qcow2-linux-889865cf54bddd8a4e258a11225bcbb646f90863.zip
Merge branch 'tcp-change-pingpong-to-3-in-delayed-ack-logic'
Wei Wang says: ==================== tcp: change pingpong to 3 in delayed ack logic TCP receiver today tries not to delay the ACKs to speed up the initial slow start (a.k.a QUICK ACK mechanism). However the previous design does not work well with modern TCP applications that starts with an application-level handshake. For example, a HTTPs server often receives the SSL hello and responds right away which triggers the TCP stack to stop the quick ack and start delaying the ACKs based only one instance of ping-pong. This patchset changes the threshold from 1 to 3 ping-pong transactions, so that we only start to delay the acks after the receiver responds data quickly three times. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inet_connection_sock.h25
-rw-r--r--net/dccp/input.c2
-rw-r--r--net/dccp/timer.c4
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_input.c8
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_output.c17
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv6/tcp_ipv6.c2
9 files changed, 51 insertions, 23 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index fe0d9b44d6fc..ff40e1d08157 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -314,4 +314,29 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen);
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
+
+#define TCP_PINGPONG_THRESH 3
+
+static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
+{
+ inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH;
+}
+
+static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
+{
+ inet_csk(sk)->icsk_ack.pingpong = 0;
+}
+
+static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
+{
+ return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (icsk->icsk_ack.pingpong < U8_MAX)
+ icsk->icsk_ack.pingpong++;
+}
#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 85d6c879383d..8d03707abdac 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -480,7 +480,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
}
- if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
+ if (sk->sk_write_pending || inet_csk_in_pingpong_mode(sk) ||
icsk->icsk_accept_queue.rskq_defer_accept) {
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 1501a20a94ca..74e138495d67 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -199,7 +199,7 @@ static void dccp_delack_timer(struct timer_list *t)
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
if (inet_csk_ack_scheduled(sk)) {
- if (!icsk->icsk_ack.pingpong) {
+ if (!inet_csk_in_pingpong_mode(sk)) {
/* Delayed ACK missed: inflate ATO. */
icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
icsk->icsk_rto);
@@ -207,7 +207,7 @@ static void dccp_delack_timer(struct timer_list *t)
/* Delayed ACK missed: leave pingpong mode and
* deflate ATO.
*/
- icsk->icsk_ack.pingpong = 0;
+ inet_csk_exit_pingpong_mode(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
dccp_send_ack(sk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 12ba21433dd0..6f8d292ad501 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1551,7 +1551,7 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied)
(copied > 0 &&
((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
- !icsk->icsk_ack.pingpong)) &&
+ !inet_csk_in_pingpong_mode(sk))) &&
!atomic_read(&sk->sk_rmem_alloc)))
time_to_ack = true;
}
@@ -2984,16 +2984,16 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_QUICKACK:
if (!val) {
- icsk->icsk_ack.pingpong = 1;
+ inet_csk_enter_pingpong_mode(sk);
} else {
- icsk->icsk_ack.pingpong = 0;
+ inet_csk_exit_pingpong_mode(sk);
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
inet_csk_ack_scheduled(sk)) {
icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
tcp_cleanup_rbuf(sk, 1);
if (!(val & 1))
- icsk->icsk_ack.pingpong = 1;
+ inet_csk_enter_pingpong_mode(sk);
}
}
break;
@@ -3407,7 +3407,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return 0;
}
case TCP_QUICKACK:
- val = !icsk->icsk_ack.pingpong;
+ val = !inet_csk_in_pingpong_mode(sk);
break;
case TCP_CONGESTION:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 76858b14ebe9..7a027dec649b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -221,7 +221,7 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_incr_quickack(sk, max_quickacks);
- icsk->icsk_ack.pingpong = 0;
+ inet_csk_exit_pingpong_mode(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
EXPORT_SYMBOL(tcp_enter_quickack_mode);
@@ -236,7 +236,7 @@ static bool tcp_in_quickack_mode(struct sock *sk)
const struct dst_entry *dst = __sk_dst_get(sk);
return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
- (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);
+ (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
}
static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
@@ -4094,7 +4094,7 @@ void tcp_fin(struct sock *sk)
case TCP_ESTABLISHED:
/* Move to CLOSE_WAIT */
tcp_set_state(sk, TCP_CLOSE_WAIT);
- inet_csk(sk)->icsk_ack.pingpong = 1;
+ inet_csk_enter_pingpong_mode(sk);
break;
case TCP_CLOSE_WAIT:
@@ -5889,7 +5889,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
return -1;
if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
- icsk->icsk_ack.pingpong) {
+ inet_csk_in_pingpong_mode(sk)) {
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
*
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index efc6fef692ff..662b034f1795 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2437,7 +2437,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
refcount_read(&sk->sk_refcnt), sk,
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
- (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
+ (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
tp->snd_cwnd,
state == TCP_LISTEN ?
fastopenq->max_qlen :
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 26a2948dca95..96bdb8eae9bb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -165,13 +165,16 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
- tp->lsndtime = now;
-
- /* If it is a reply for ato after last received
- * packet, enter pingpong mode.
+ /* If this is the first data packet sent in response to the
+ * previous received data,
+ * and it is a reply for ato after last received packet,
+ * increase pingpong count.
*/
- if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
- icsk->icsk_ack.pingpong = 1;
+ if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
+ (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+ inet_csk_inc_pingpong_cnt(sk);
+
+ tp->lsndtime = now;
}
/* Account for an ACK we sent. */
@@ -3569,7 +3572,7 @@ void tcp_send_delayed_ack(struct sock *sk)
const struct tcp_sock *tp = tcp_sk(sk);
int max_ato = HZ / 2;
- if (icsk->icsk_ack.pingpong ||
+ if (inet_csk_in_pingpong_mode(sk) ||
(icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
max_ato = TCP_DELACK_MAX;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d7399a89469d..f0c86398e6a7 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -277,14 +277,14 @@ void tcp_delack_timer_handler(struct sock *sk)
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
if (inet_csk_ack_scheduled(sk)) {
- if (!icsk->icsk_ack.pingpong) {
+ if (!inet_csk_in_pingpong_mode(sk)) {
/* Delayed ACK missed: inflate ATO. */
icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
} else {
/* Delayed ACK missed: leave pingpong mode and
* deflate ATO.
*/
- icsk->icsk_ack.pingpong = 0;
+ inet_csk_exit_pingpong_mode(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
tcp_mstamp_refresh(tcp_sk(sk));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b81eb7cb815e..e51cda79f0cc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1864,7 +1864,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
refcount_read(&sp->sk_refcnt), sp,
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
- (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
+ (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
tp->snd_cwnd,
state == TCP_LISTEN ?
fastopenq->max_qlen :