From dd82088dab3646ed28e4aa43d1a5b5d5ffc2afba Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 21 Feb 2013 11:12:40 +0100 Subject: netfilter: ipset: "Directory not empty" error message When an entry flagged with "nomatch" was tested by ipset, it returned the error message "Kernel error received: Directory not empty" instead of " is NOT in set " (reported by John Brendler). The internal error code was not properly transformed before returning to userspace, fixed. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 6d6d8f2b033e..38ca630eeeb8 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1470,7 +1470,8 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, if (ret == -EAGAIN) ret = 1; - return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST; + return (ret < 0 && ret != -ENOTEMPTY) ? ret : + ret > 0 ? 0 : -IPSET_ERR_EXIST; } /* Get headed data of a set */ -- cgit v1.2.3-55-g7522 From 90c7881ecee1f08e0a49172cf61371cf2509ee4a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 26 Feb 2013 19:15:02 +0000 Subject: irda: small read beyond end of array in debug code charset comes from skb->data. It's a number in the 0-255 range. If we have debugging turned on then this could cause a read beyond the end of the array. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/irda/iriap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/irda/iriap.c b/net/irda/iriap.c index e71e85ba2bf1..29340a9a6fb9 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -495,8 +495,11 @@ static void iriap_getvaluebyclass_confirm(struct iriap_cb *self, /* case CS_ISO_8859_9: */ /* case CS_UNICODE: */ default: - IRDA_DEBUG(0, "%s(), charset %s, not supported\n", - __func__, ias_charset_types[charset]); + IRDA_DEBUG(0, "%s(), charset [%d] %s, not supported\n", + __func__, charset, + charset < ARRAY_SIZE(ias_charset_types) ? + ias_charset_types[charset] : + "(unknown)"); /* Aborting, close connection! */ iriap_disconnect_request(self); -- cgit v1.2.3-55-g7522 From 726bc6b092da4c093eb74d13c07184b18c1af0f1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 27 Feb 2013 10:57:31 +0000 Subject: net/sctp: Validate parameter size for SCTP_GET_ASSOC_STATS Building sctp may fail with: In function ‘copy_from_user’, inlined from ‘sctp_getsockopt_assoc_stats’ at net/sctp/socket.c:5656:20: arch/x86/include/asm/uaccess_32.h:211:26: error: call to ‘copy_from_user_overflow’ declared with attribute error: copy_from_user() buffer size is not provably correct if built with W=1 due to a missing parameter size validation before the call to copy_from_user. Signed-off-by: Guenter Roeck Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index cedd9bf67b8c..9ef5c7312e12 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5653,6 +5653,9 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, if (len < sizeof(sctp_assoc_t)) return -EINVAL; + /* Allow the struct to grow and fill in as much as possible */ + len = min_t(size_t, len, sizeof(sas)); + if (copy_from_user(&sas, optval, len)) return -EFAULT; @@ -5686,9 +5689,6 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, /* Mark beginning of a new observation period */ asoc->stats.max_obs_rto = asoc->rto_min; - /* Allow the struct to grow and fill in as much as possible */ - len = min_t(size_t, len, sizeof(sas)); - if (put_user(len, optlen)) return -EFAULT; -- cgit v1.2.3-55-g7522 From 70fc69bc5a54d9776ace7c99d46eb533f8fb6e89 Mon Sep 17 00:00:00 2001 From: Lee A. Roberts Date: Thu, 28 Feb 2013 04:37:27 +0000 Subject: sctp: fix association hangs due to off-by-one errors in sctp_tsnmap_grow() In sctp_tsnmap_mark(), correct off-by-one error when calculating size value for sctp_tsnmap_grow(). In sctp_tsnmap_grow(), correct off-by-one error when copying and resizing the tsnmap. If max_tsn_seen is in the LSB of the word, this bit can be lost, causing the corresponding packet to be transmitted again and to be entered as a duplicate into the SCTP reassembly/ordering queues. Change parameter name from "gap" (zero-based index) to "size" (one-based) to enhance code readability. Signed-off-by: Lee A. Roberts Acked-by: Vlad Yasevich Acked-by: Neil Horman --- net/sctp/tsnmap.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 5f25e0c92c31..396c45174e5b 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -51,7 +51,7 @@ static void sctp_tsnmap_update(struct sctp_tsnmap *map); static void sctp_tsnmap_find_gap_ack(unsigned long *map, __u16 off, __u16 len, __u16 *start, __u16 *end); -static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap); +static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 size); /* Initialize a block of memory as a tsnmap. */ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *map, __u16 len, @@ -124,7 +124,7 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn, gap = tsn - map->base_tsn; - if (gap >= map->len && !sctp_tsnmap_grow(map, gap)) + if (gap >= map->len && !sctp_tsnmap_grow(map, gap + 1)) return -ENOMEM; if (!sctp_tsnmap_has_gap(map) && gap == 0) { @@ -360,23 +360,24 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map, return ngaps; } -static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap) +static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 size) { unsigned long *new; unsigned long inc; u16 len; - if (gap >= SCTP_TSN_MAP_SIZE) + if (size > SCTP_TSN_MAP_SIZE) return 0; - inc = ALIGN((gap - map->len),BITS_PER_LONG) + SCTP_TSN_MAP_INCREMENT; + inc = ALIGN((size - map->len), BITS_PER_LONG) + SCTP_TSN_MAP_INCREMENT; len = min_t(u16, map->len + inc, SCTP_TSN_MAP_SIZE); new = kzalloc(len>>3, GFP_ATOMIC); if (!new) return 0; - bitmap_copy(new, map->tsn_map, map->max_tsn_seen - map->base_tsn); + bitmap_copy(new, map->tsn_map, + map->max_tsn_seen - map->cumulative_tsn_ack_point); kfree(map->tsn_map); map->tsn_map = new; map->len = len; -- cgit v1.2.3-55-g7522 From e67f85ecd83de66d4f25f2e0f90bb0d01a52ddd8 Mon Sep 17 00:00:00 2001 From: Lee A. Roberts Date: Thu, 28 Feb 2013 04:37:28 +0000 Subject: sctp: fix association hangs due to reneging packets below the cumulative TSN ACK point In sctp_ulpq_renege_list(), do not renege packets below the cumulative TSN ACK point. Signed-off-by: Lee A. Roberts Acked-by: Vlad Yasevich Acked-by: Neil Horman --- net/sctp/ulpqueue.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index ada17464b65b..63afddcbcd2c 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -969,11 +969,16 @@ static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, tsnmap = &ulpq->asoc->peer.tsn_map; - while ((skb = __skb_dequeue_tail(list)) != NULL) { - freed += skb_headlen(skb); + while ((skb = skb_peek_tail(list)) != NULL) { event = sctp_skb2event(skb); tsn = event->tsn; + /* Don't renege below the Cumulative TSN ACK Point. */ + if (TSN_lte(tsn, sctp_tsnmap_get_ctsn(tsnmap))) + break; + + __skb_unlink(skb, list); + freed += skb_headlen(skb); sctp_ulpevent_free(event); sctp_tsnmap_renege(tsnmap, tsn); if (freed >= needed) -- cgit v1.2.3-55-g7522 From 95ac7b859f508b1b3e6adf7dce307864e4384a69 Mon Sep 17 00:00:00 2001 From: Lee A. Roberts Date: Thu, 28 Feb 2013 04:37:29 +0000 Subject: sctp: fix association hangs due to errors when reneging events from the ordering queue In sctp_ulpq_renege_list(), events being reneged from the ordering queue may correspond to multiple TSNs. Identify all affected packets; sum freed space and renege from the tsnmap. Signed-off-by: Lee A. Roberts Acked-by: Vlad Yasevich Acked-by: Neil Horman --- net/sctp/ulpqueue.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 63afddcbcd2c..f221fbbc80ac 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -962,8 +962,8 @@ static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, struct sk_buff_head *list, __u16 needed) { __u16 freed = 0; - __u32 tsn; - struct sk_buff *skb; + __u32 tsn, last_tsn; + struct sk_buff *skb, *flist, *last; struct sctp_ulpevent *event; struct sctp_tsnmap *tsnmap; @@ -977,10 +977,28 @@ static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, if (TSN_lte(tsn, sctp_tsnmap_get_ctsn(tsnmap))) break; - __skb_unlink(skb, list); + /* Events in ordering queue may have multiple fragments + * corresponding to additional TSNs. Sum the total + * freed space; find the last TSN. + */ freed += skb_headlen(skb); + flist = skb_shinfo(skb)->frag_list; + for (last = flist; flist; flist = flist->next) { + last = flist; + freed += skb_headlen(last); + } + if (last) + last_tsn = sctp_skb2event(last)->tsn; + else + last_tsn = tsn; + + /* Unlink the event, then renege all applicable TSNs. */ + __skb_unlink(skb, list); sctp_ulpevent_free(event); - sctp_tsnmap_renege(tsnmap, tsn); + while (TSN_lte(tsn, last_tsn)) { + sctp_tsnmap_renege(tsnmap, tsn); + tsn++; + } if (freed >= needed) return freed; } -- cgit v1.2.3-55-g7522 From d003b41b801124b96337973b01eada6a83673d23 Mon Sep 17 00:00:00 2001 From: Lee A. Roberts Date: Thu, 28 Feb 2013 04:37:30 +0000 Subject: sctp: fix association hangs due to partial delivery errors In sctp_ulpq_tail_data(), use return values 0,1 to indicate whether a complete event (with MSG_EOR set) was delivered. A return value of -ENOMEM continues to indicate an out-of-memory condition was encountered. In sctp_ulpq_retrieve_partial() and sctp_ulpq_retrieve_first(), correct message reassembly logic for SCTP partial delivery. Change logic to ensure that as much data as possible is sent with the initial partial delivery and that following partial deliveries contain all available data. In sctp_ulpq_partial_delivery(), attempt partial delivery only if the data on the head of the reassembly queue is at or before the cumulative TSN ACK point. In sctp_ulpq_renege(), use the modified return values from sctp_ulpq_tail_data() to choose whether to attempt partial delivery or to attempt to drain the reassembly queue as a means to reduce memory pressure. Remove call to sctp_tsnmap_mark(), as this is handled correctly in call to sctp_ulpq_tail_data(). Signed-off-by: Lee A. Roberts Acked-by: Vlad Yasevich Acked-by: Neil Horman --- net/sctp/ulpqueue.c | 54 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index f221fbbc80ac..0fd5b3d2df03 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -106,6 +106,7 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, { struct sk_buff_head temp; struct sctp_ulpevent *event; + int event_eor = 0; /* Create an event from the incoming chunk. */ event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp); @@ -127,10 +128,12 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, /* Send event to the ULP. 'event' is the sctp_ulpevent for * very first SKB on the 'temp' list. */ - if (event) + if (event) { + event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0; sctp_ulpq_tail_event(ulpq, event); + } - return 0; + return event_eor; } /* Add a new event for propagation to the ULP. */ @@ -540,14 +543,19 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq) ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { + case SCTP_DATA_FIRST_FRAG: + if (!first_frag) + return NULL; + goto done; case SCTP_DATA_MIDDLE_FRAG: if (!first_frag) { first_frag = pos; next_tsn = ctsn + 1; last_frag = pos; - } else if (next_tsn == ctsn) + } else if (next_tsn == ctsn) { next_tsn++; - else + last_frag = pos; + } else goto done; break; case SCTP_DATA_LAST_FRAG: @@ -651,6 +659,14 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq) } else goto done; break; + + case SCTP_DATA_LAST_FRAG: + if (!first_frag) + return NULL; + else + goto done; + break; + default: return NULL; } @@ -1025,16 +1041,28 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event; struct sctp_association *asoc; struct sctp_sock *sp; + __u32 ctsn; + struct sk_buff *skb; asoc = ulpq->asoc; sp = sctp_sk(asoc->base.sk); /* If the association is already in Partial Delivery mode - * we have noting to do. + * we have nothing to do. */ if (ulpq->pd_mode) return; + /* Data must be at or below the Cumulative TSN ACK Point to + * start partial delivery. + */ + skb = skb_peek(&asoc->ulpq.reasm); + if (skb != NULL) { + ctsn = sctp_skb2event(skb)->tsn; + if (!TSN_lte(ctsn, sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map))) + return; + } + /* If the user enabled fragment interleave socket option, * multiple associations can enter partial delivery. * Otherwise, we can only enter partial delivery if the @@ -1077,12 +1105,16 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, } /* If able to free enough room, accept this chunk. */ if (chunk && (freed >= needed)) { - __u32 tsn; - tsn = ntohl(chunk->subh.data_hdr->tsn); - sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport); - sctp_ulpq_tail_data(ulpq, chunk, gfp); - - sctp_ulpq_partial_delivery(ulpq, gfp); + int retval; + retval = sctp_ulpq_tail_data(ulpq, chunk, gfp); + /* + * Enter partial delivery if chunk has not been + * delivered; otherwise, drain the reassembly queue. + */ + if (retval <= 0) + sctp_ulpq_partial_delivery(ulpq, gfp); + else if (retval == 1) + sctp_ulpq_reasm_drain(ulpq); } sk_mem_reclaim(asoc->base.sk); -- cgit v1.2.3-55-g7522 From 8b82547e33e85fc24d4d172a93c796de1fefa81a Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 1 Mar 2013 05:02:02 +0000 Subject: l2tp: Restore socket refcount when sendmsg succeeds The sendmsg() syscall handler for PPPoL2TP doesn't decrease the socket reference counter after successful transmissions. Any successful sendmsg() call from userspace will then increase the reference counter forever, thus preventing the kernel's session and tunnel data from being freed later on. The problem only happens when writing directly on L2TP sockets. PPP sockets attached to L2TP are unaffected as the PPP subsystem uses pppol2tp_xmit() which symmetrically increase/decrease reference counters. This patch adds the missing call to sock_put() before returning from pppol2tp_sendmsg(). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 3f4e3afc191a..6a53371dba1f 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -355,6 +355,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh l2tp_xmit_skb(session, skb, session->hdr_len); sock_put(ps->tunnel_sock); + sock_put(sk); return error; -- cgit v1.2.3-55-g7522 From d8c6f4b9b7848bca8babfc0ae43a50c8ab22fbb9 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 1 Mar 2013 07:44:08 +0000 Subject: ipv[4|6]: correct dropwatch false positive in local_deliver_finish I had a report recently of a user trying to use dropwatch to localise some frame loss, and they were getting false positives. Turned out they were using a user space SCTP stack that used raw sockets to grab frames. When we don't have a registered protocol for a given packet, we record it as a drop, even if a raw socket receieves the frame. We should only record the drop in the event a raw socket doesnt exist to receive the frames Tested by the reported successfully Signed-off-by: Neil Horman Reported-by: William Reich Tested-by: William Reich CC: "David S. Miller" CC: William Reich CC: eric.dumazet@gmail.com Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 6 ++++-- net/ipv6/ip6_input.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 87abd3e2bd32..2bdf802e28e2 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -228,9 +228,11 @@ static int ip_local_deliver_finish(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); } - } else + kfree_skb(skb); + } else { IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); - kfree_skb(skb); + consume_skb(skb); + } } } out: diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 5b10414e619e..b1876e52091e 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -241,9 +241,11 @@ resubmit: icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff); } - } else + kfree_skb(skb); + } else { IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); - kfree_skb(skb); + consume_skb(skb); + } } rcu_read_unlock(); return 0; -- cgit v1.2.3-55-g7522 From 81ce0dbc119fa31af21d02febde1cf923022d4d6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Feb 2013 19:27:43 +0000 Subject: sctp: use the passed in gfp flags instead GFP_KERNEL This patch doesn't change how the code works because in the current kernel gfp is always GFP_KERNEL. But gfp was obviously intended instead of GFP_KERNEL. Signed-off-by: Dan Carpenter Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/endpointola.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 73aad3d16a45..9a680c075141 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -155,7 +155,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* SCTP-AUTH extensions*/ INIT_LIST_HEAD(&ep->endpoint_shared_keys); - null_key = sctp_auth_shkey_create(0, GFP_KERNEL); + null_key = sctp_auth_shkey_create(0, gfp); if (!null_key) goto nomem; -- cgit v1.2.3-55-g7522 From d6e89c0b7660bd725a0948126f9db1a327f48d76 Mon Sep 17 00:00:00 2001 From: Silviu-Mihai Popescu Date: Sat, 2 Mar 2013 09:45:19 +0000 Subject: caif_dev: fix sparse warnings for caif_flow_cb This fixed the following sparse warning: net/caif/caif_dev.c:121:6: warning: symbol 'caif_flow_cb' was not declared. Should it be static? Signed-off-by: Silviu-Mihai Popescu Signed-off-by: David S. Miller --- net/caif/caif_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 1ae1d9cb278d..21760f008974 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -118,7 +118,7 @@ static struct caif_device_entry *caif_get(struct net_device *dev) return NULL; } -void caif_flow_cb(struct sk_buff *skb) +static void caif_flow_cb(struct sk_buff *skb) { struct caif_device_entry *caifd; void (*dtor)(struct sk_buff *skb) = NULL; -- cgit v1.2.3-55-g7522 From ece6b0a2b25652d684a7ced4ae680a863af041e0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 3 Mar 2013 16:18:11 +0000 Subject: rds: limit the size allocated by rds_message_alloc() Dave Jones reported the following bug: "When fed mangled socket data, rds will trust what userspace gives it, and tries to allocate enormous amounts of memory larger than what kmalloc can satisfy." WARNING: at mm/page_alloc.c:2393 __alloc_pages_nodemask+0xa0d/0xbe0() Hardware name: GA-MA78GM-S2H Modules linked in: vmw_vsock_vmci_transport vmw_vmci vsock fuse bnep dlci bridge 8021q garp stp mrp binfmt_misc l2tp_ppp l2tp_core rfcomm s Pid: 24652, comm: trinity-child2 Not tainted 3.8.0+ #65 Call Trace: [] warn_slowpath_common+0x75/0xa0 [] warn_slowpath_null+0x1a/0x20 [] __alloc_pages_nodemask+0xa0d/0xbe0 [] ? native_sched_clock+0x26/0x90 [] ? trace_hardirqs_off_caller+0x28/0xc0 [] ? trace_hardirqs_off+0xd/0x10 [] alloc_pages_current+0xb8/0x180 [] __get_free_pages+0x2a/0x80 [] kmalloc_order_trace+0x3e/0x1a0 [] __kmalloc+0x2f5/0x3a0 [] ? local_bh_enable_ip+0x7c/0xf0 [] rds_message_alloc+0x23/0xb0 [rds] [] rds_sendmsg+0x2b1/0x990 [rds] [] ? trace_hardirqs_off+0xd/0x10 [] sock_sendmsg+0xb0/0xe0 [] ? get_lock_stats+0x22/0x70 [] ? put_lock_stats.isra.23+0xe/0x40 [] sys_sendto+0x130/0x180 [] ? trace_hardirqs_on+0xd/0x10 [] ? _raw_spin_unlock_irq+0x3b/0x60 [] ? sysret_check+0x1b/0x56 [] ? trace_hardirqs_on_caller+0x115/0x1a0 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b ---[ end trace eed6ae990d018c8b ]--- Reported-by: Dave Jones Cc: Dave Jones Cc: David S. Miller Cc: Venkat Venkatsubra Signed-off-by: Cong Wang Acked-by: Venkat Venkatsubra Signed-off-by: David S. Miller --- net/rds/message.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/rds/message.c b/net/rds/message.c index f0a4658f3273..aff589cc022e 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -197,6 +197,9 @@ struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp) { struct rds_message *rm; + if (extra_len > KMALLOC_MAX_SIZE - sizeof(struct rds_message)) + return NULL; + rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp); if (!rm) goto out; -- cgit v1.2.3-55-g7522 From 3f736868b47687d1336fe88185560b22bb92021e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 3 Mar 2013 16:28:27 +0000 Subject: sctp: use KMALLOC_MAX_SIZE instead of its own MAX_KMALLOC_SIZE Don't definite its own MAX_KMALLOC_SIZE, use the one defined in mm. Cc: Vlad Yasevich Cc: Sridhar Samudrala Cc: Neil Horman Cc: David S. Miller Signed-off-by: Cong Wang Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/ssnmap.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c index 442ad4ed6315..825ea94415b3 100644 --- a/net/sctp/ssnmap.c +++ b/net/sctp/ssnmap.c @@ -41,8 +41,6 @@ #include #include -#define MAX_KMALLOC_SIZE 131072 - static struct sctp_ssnmap *sctp_ssnmap_init(struct sctp_ssnmap *map, __u16 in, __u16 out); @@ -65,7 +63,7 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, int size; size = sctp_ssnmap_size(in, out); - if (size <= MAX_KMALLOC_SIZE) + if (size <= KMALLOC_MAX_SIZE) retval = kmalloc(size, gfp); else retval = (struct sctp_ssnmap *) @@ -82,7 +80,7 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, return retval; fail_map: - if (size <= MAX_KMALLOC_SIZE) + if (size <= KMALLOC_MAX_SIZE) kfree(retval); else free_pages((unsigned long)retval, get_order(size)); @@ -124,7 +122,7 @@ void sctp_ssnmap_free(struct sctp_ssnmap *map) int size; size = sctp_ssnmap_size(map->in.len, map->out.len); - if (size <= MAX_KMALLOC_SIZE) + if (size <= KMALLOC_MAX_SIZE) kfree(map); else free_pages((unsigned long)map, get_order(size)); -- cgit v1.2.3-55-g7522 From 3e8b0ac3e41e3c882222a5522d5df7212438ab51 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Sun, 3 Mar 2013 20:46:46 +0000 Subject: net: ipv6: Don't purge default router if accept_ra=2 Setting net.ipv6.conf..accept_ra=2 causes the kernel to accept RAs even when forwarding is enabled. However, enabling forwarding purges all default routes on the system, breaking connectivity until the next RA is received. Fix this by not purging default routes on interfaces that have accept_ra=2. Signed-off-by: Lorenzo Colitti Acked-by: YOSHIFUJI Hideaki Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 928266569689..e5fe0041adfa 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1915,7 +1915,8 @@ void rt6_purge_dflt_routers(struct net *net) restart: read_lock_bh(&table->tb6_lock); for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { + if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && + (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); ip6_del_rt(rt); -- cgit v1.2.3-55-g7522 From 7dac1b514a00817ddb43704068c14ffd8b8fba19 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 3 Mar 2013 20:57:18 +0000 Subject: rds: simplify a warning message Cc: David S. Miller Cc: Venkat Venkatsubra Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/rds/message.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/rds/message.c b/net/rds/message.c index aff589cc022e..aba232f9f308 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -82,10 +82,7 @@ static void rds_message_purge(struct rds_message *rm) void rds_message_put(struct rds_message *rm) { rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); - if (atomic_read(&rm->m_refcount) == 0) { -printk(KERN_CRIT "danger refcount zero on %p\n", rm); -WARN_ON(1); - } + WARN(!atomic_read(&rm->m_refcount), "danger refcount zero on %p\n", rm); if (atomic_dec_and_test(&rm->m_refcount)) { BUG_ON(!list_empty(&rm->m_sock_item)); BUG_ON(!list_empty(&rm->m_conn_item)); -- cgit v1.2.3-55-g7522 From d2123be0e55101742dacd6dae60cdd0f1e2ef302 Mon Sep 17 00:00:00 2001 From: Silviu-Mihai Popescu Date: Sun, 3 Mar 2013 21:09:31 +0000 Subject: CAIF: fix sparse warning for caif_usb This fixes the following sparse warning: net/caif/caif_usb.c:84:16: warning: symbol 'cfusbl_create' was not declared. Should it be static? Signed-off-by: Silviu-Mihai Popescu Signed-off-by: David S. Miller --- net/caif/caif_usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 3ebc8cbc91ff..ef8ebaa993cf 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -81,8 +81,8 @@ static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, layr->up->ctrlcmd(layr->up, ctrl, layr->id); } -struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN], - u8 braddr[ETH_ALEN]) +static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN], + u8 braddr[ETH_ALEN]) { struct cfusbl *this = kmalloc(sizeof(struct cfusbl), GFP_ATOMIC); -- cgit v1.2.3-55-g7522 From aab2b4bf224ef8358d262f95b568b8ad0cecf0a0 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 4 Mar 2013 06:23:05 +0000 Subject: tcp: fix double-counted receiver RTT when leaving receiver fast path We should not update ts_recent and call tcp_rcv_rtt_measure_ts() both before and after going to step5. That wastes CPU and double-counts the receiver-side RTT sample. Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a759e19496d2..0d9bdacce99f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5485,6 +5485,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, if (tcp_checksum_complete_user(sk, skb)) goto csum_error; + if ((int)skb->truesize > sk->sk_forward_alloc) + goto step5; + /* Predicted packet is in window by definition. * seq == rcv_nxt and rcv_wup <= rcv_nxt. * Hence, check seq<=rcv_wup reduces to: @@ -5496,9 +5499,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_rcv_rtt_measure_ts(sk, skb); - if ((int)skb->truesize > sk->sk_forward_alloc) - goto step5; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ -- cgit v1.2.3-55-g7522 From a4ed2e737cb73e4405a3649f8aef7619b99fecae Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Tue, 5 Mar 2013 06:09:04 +0000 Subject: net/irda: Fix port open counts Saving the port count bump is unsafe. If the tty is hung up while this open was blocking, the port count is zeroed. Explicitly check if the tty was hung up while blocking, and correct the port count if not. Signed-off-by: Peter Hurley Signed-off-by: David S. Miller --- net/irda/ircomm/ircomm_tty.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 9a5fd3c3e530..1721dc7e4315 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -280,7 +280,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, struct tty_port *port = &self->port; DECLARE_WAITQUEUE(wait, current); int retval; - int do_clocal = 0, extra_count = 0; + int do_clocal = 0; unsigned long flags; IRDA_DEBUG(2, "%s()\n", __func__ ); @@ -315,10 +315,8 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, __FILE__, __LINE__, tty->driver->name, port->count); spin_lock_irqsave(&port->lock, flags); - if (!tty_hung_up_p(filp)) { - extra_count = 1; + if (!tty_hung_up_p(filp)) port->count--; - } spin_unlock_irqrestore(&port->lock, flags); port->blocked_open++; @@ -361,12 +359,10 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, __set_current_state(TASK_RUNNING); remove_wait_queue(&port->open_wait, &wait); - if (extra_count) { - /* ++ is not atomic, so this should be protected - Jean II */ - spin_lock_irqsave(&port->lock, flags); + spin_lock_irqsave(&port->lock, flags); + if (!tty_hung_up_p(filp)) port->count++; - spin_unlock_irqrestore(&port->lock, flags); - } + spin_unlock_irqrestore(&port->lock, flags); port->blocked_open--; IRDA_DEBUG(1, "%s(%d):block_til_ready after blocking on %s open_count=%d\n", -- cgit v1.2.3-55-g7522 From 2f7c069b96ed7b1f6236f2fa7b0bc06f4f54f2d9 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Tue, 5 Mar 2013 06:09:05 +0000 Subject: net/irda: Hold port lock while bumping blocked_open Although tty_lock() already protects concurrent update to blocked_open, that fails to meet the separation-of-concerns between tty_port and tty. Signed-off-by: Peter Hurley Signed-off-by: David S. Miller --- net/irda/ircomm/ircomm_tty.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 1721dc7e4315..d282bbea710e 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -317,8 +317,8 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, spin_lock_irqsave(&port->lock, flags); if (!tty_hung_up_p(filp)) port->count--; - spin_unlock_irqrestore(&port->lock, flags); port->blocked_open++; + spin_unlock_irqrestore(&port->lock, flags); while (1) { if (tty->termios.c_cflag & CBAUD) @@ -362,8 +362,8 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, spin_lock_irqsave(&port->lock, flags); if (!tty_hung_up_p(filp)) port->count++; - spin_unlock_irqrestore(&port->lock, flags); port->blocked_open--; + spin_unlock_irqrestore(&port->lock, flags); IRDA_DEBUG(1, "%s(%d):block_til_ready after blocking on %s open_count=%d\n", __FILE__, __LINE__, tty->driver->name, port->count); -- cgit v1.2.3-55-g7522 From 0b176ce3a7cbfa92eceddf3896f1a504877d974a Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Tue, 5 Mar 2013 06:09:06 +0000 Subject: net/irda: Use barrier to set task state Without a memory and compiler barrier, the task state change can migrate relative to the condition testing in a blocking loop. However, the task state change must be visible across all cpus prior to testing those conditions. Failing to do this can result in the familiar 'lost wakeup' and this task will hang until killed. Signed-off-by: Peter Hurley Signed-off-by: David S. Miller --- net/irda/ircomm/ircomm_tty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index d282bbea710e..522543d9264a 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -324,7 +324,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, if (tty->termios.c_cflag & CBAUD) tty_port_raise_dtr_rts(port); - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); if (tty_hung_up_p(filp) || !test_bit(ASYNCB_INITIALIZED, &port->flags)) { -- cgit v1.2.3-55-g7522 From f74861ca87264e594e0134e8ac14db06be77fe6f Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Tue, 5 Mar 2013 06:09:07 +0000 Subject: net/irda: Raise dtr in non-blocking open DTR/RTS need to be raised, regardless of the open() mode, but not if the port has already shutdown. Signed-off-by: Peter Hurley Signed-off-by: David S. Miller --- net/irda/ircomm/ircomm_tty.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 522543d9264a..362ba47968e4 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -289,8 +289,15 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, * If non-blocking mode is set, or the port is not enabled, * then make the check up front and then exit. */ - if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){ - /* nonblock mode is set or port is not enabled */ + if (test_bit(TTY_IO_ERROR, &tty->flags)) { + port->flags |= ASYNC_NORMAL_ACTIVE; + return 0; + } + + if (filp->f_flags & O_NONBLOCK) { + /* nonblock mode is set */ + if (tty->termios.c_cflag & CBAUD) + tty_port_raise_dtr_rts(port); port->flags |= ASYNC_NORMAL_ACTIVE; IRDA_DEBUG(1, "%s(), O_NONBLOCK requested!\n", __func__ ); return 0; -- cgit v1.2.3-55-g7522 From 9b99b7e90bf5cd9a88823c49574ba80d92a98262 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 5 Mar 2013 08:04:57 +0000 Subject: pkt_sched: sch_qfq: properly cap timestamps in charge_actual_service QFQ+ schedules the active aggregates in a group using a bucket list (one list per group). The bucket in which each aggregate is inserted depends on the aggregate's timestamps, and the number of buckets in a group is enough to accomodate the possible (range of) values of the timestamps of all the aggregates in the group. For this property to hold, timestamps must however be computed correctly. One necessary condition for computing timestamps correctly is that the number of bits dequeued for each aggregate, while the aggregate is in service, does not exceed the maximum budget budgetmax assigned to the aggregate. For each aggregate, budgetmax is proportional to the number of classes in the aggregate. If the number of classes of the aggregate is decreased through qfq_change_class(), then budgetmax is decreased automatically as well. Problems may occur if the aggregate is in service when budgetmax is decreased, because the current remaining budget of the aggregate and/or the service already received by the aggregate may happen to be larger than the new value of budgetmax. In this case, when the aggregate is eventually deselected and its timestamps are updated, the aggregate may happen to have received an amount of service larger than budgetmax. This may cause the aggregate to be assigned a higher virtual finish time than the maximum acceptable value for the last bucket in the bucket list of the group. This fix introduces a cap that addresses this issue. Signed-off-by: Paolo Valente Reviewed-by: Fabio Checconi Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index e9a77f621c3d..489edd929932 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -298,6 +298,10 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg, new_num_classes == q->max_agg_classes - 1) /* agg no more full */ hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs); + /* The next assignment may let + * agg->initial_budget > agg->budgetmax + * hold, we will take it into account in charge_actual_service(). + */ agg->budgetmax = new_num_classes * agg->lmax; new_agg_weight = agg->class_weight * new_num_classes; agg->inv_w = ONE_FP/new_agg_weight; @@ -988,8 +992,13 @@ static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg, /* Update F according to the actual service received by the aggregate. */ static inline void charge_actual_service(struct qfq_aggregate *agg) { - /* compute the service received by the aggregate */ - u32 service_received = agg->initial_budget - agg->budget; + /* Compute the service received by the aggregate, taking into + * account that, after decreasing the number of classes in + * agg, it may happen that + * agg->initial_budget - agg->budget > agg->bugdetmax + */ + u32 service_received = min(agg->budgetmax, + agg->initial_budget - agg->budget); agg->F = agg->S + (u64)service_received * agg->inv_w; } -- cgit v1.2.3-55-g7522 From 624b85fb96206879c8146abdba071222bbd25259 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 5 Mar 2013 08:04:58 +0000 Subject: pkt_sched: sch_qfq: fix the update of eligible-group sets Between two invocations of make_eligible, the system virtual time may happen to grow enough that, in its binary representation, a bit with higher order than 31 flips. This happens especially with TSO/GSO. Before this fix, the mask used in make_eligible was computed as (1UL< 31. The fix just replaces 1UL with 1ULL. Signed-off-by: Paolo Valente Reviewed-by: Fabio Checconi Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 489edd929932..39d85cc0db26 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -821,7 +821,7 @@ static void qfq_make_eligible(struct qfq_sched *q) unsigned long old_vslot = q->oldV >> q->min_slot_shift; if (vslot != old_vslot) { - unsigned long mask = (1UL << fls(vslot ^ old_vslot)) - 1; + unsigned long mask = (1ULL << fls(vslot ^ old_vslot)) - 1; qfq_move_groups(q, mask, IR, ER); qfq_move_groups(q, mask, IB, EB); } -- cgit v1.2.3-55-g7522 From 2f3b89a1fe0823fceb544856c9eeb036a75ff091 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 5 Mar 2013 08:04:59 +0000 Subject: pkt_sched: sch_qfq: serve activated aggregates immediately if the scheduler is empty If no aggregate is in service, then the function qfq_dequeue() does not dequeue any packet. For this reason, to guarantee QFQ+ to be work conserving, a just-activated aggregate must be set as in service immediately if it happens to be the only active aggregate. This is done by the function qfq_enqueue(). Unfortunately, the function qfq_add_to_agg(), used to add a class to an aggregate, does not perform this important additional operation. In particular, if: 1) qfq_add_to_agg() is invoked to complete the move of a class from a source aggregate, becoming, for this move, inactive, to a destination aggregate, becoming instead active, and 2) the destination aggregate becomes the only active aggregate, then this aggregate is not however set as in service. QFQ+ remains then in a non-work-conserving state until a new invocation of qfq_enqueue() recovers the situation. This fix solves the problem by moving the logic for setting an aggregate as in service directly into the function qfq_activate_agg(). Hence, from whatever point qfq_activate_aggregate() is invoked, QFQ+ remains work conserving. Since the more-complex logic of this new version of activate_aggregate() is not necessary, in qfq_dequeue(), to reschedule an aggregate that finishes its budget, then the aggregate is now rescheduled by invoking directly the functions needed. Signed-off-by: Paolo Valente Reviewed-by: Fabio Checconi Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 39d85cc0db26..8c5172c7f193 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1003,6 +1003,12 @@ static inline void charge_actual_service(struct qfq_aggregate *agg) agg->F = agg->S + (u64)service_received * agg->inv_w; } +static inline void qfq_update_agg_ts(struct qfq_sched *q, + struct qfq_aggregate *agg, + enum update_reason reason); + +static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg); + static struct sk_buff *qfq_dequeue(struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); @@ -1030,7 +1036,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) in_serv_agg->initial_budget = in_serv_agg->budget = in_serv_agg->budgetmax; - if (!list_empty(&in_serv_agg->active)) + if (!list_empty(&in_serv_agg->active)) { /* * Still active: reschedule for * service. Possible optimization: if no other @@ -1041,8 +1047,9 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) * handle it, we would need to maintain an * extra num_active_aggs field. */ - qfq_activate_agg(q, in_serv_agg, requeue); - else if (sch->q.qlen == 0) { /* no aggregate to serve */ + qfq_update_agg_ts(q, in_serv_agg, requeue); + qfq_schedule_agg(q, in_serv_agg); + } else if (sch->q.qlen == 0) { /* no aggregate to serve */ q->in_serv_agg = NULL; return NULL; } @@ -1226,17 +1233,11 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->deficit = agg->lmax; list_add_tail(&cl->alist, &agg->active); - if (list_first_entry(&agg->active, struct qfq_class, alist) != cl) - return err; /* aggregate was not empty, nothing else to do */ - - /* recharge budget */ - agg->initial_budget = agg->budget = agg->budgetmax; + if (list_first_entry(&agg->active, struct qfq_class, alist) != cl || + q->in_serv_agg == agg) + return err; /* non-empty or in service, nothing else to do */ - qfq_update_agg_ts(q, agg, enqueue); - if (q->in_serv_agg == NULL) - q->in_serv_agg = agg; - else if (agg != q->in_serv_agg) - qfq_schedule_agg(q, agg); + qfq_activate_agg(q, agg, enqueue); return err; } @@ -1293,8 +1294,15 @@ skip_update: static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg, enum update_reason reason) { + agg->initial_budget = agg->budget = agg->budgetmax; /* recharge budg. */ + qfq_update_agg_ts(q, agg, reason); - qfq_schedule_agg(q, agg); + if (q->in_serv_agg == NULL) { /* no aggr. in service or scheduled */ + q->in_serv_agg = agg; /* start serving this aggregate */ + /* update V: to be in service, agg must be eligible */ + q->oldV = q->V = agg->S; + } else if (agg != q->in_serv_agg) + qfq_schedule_agg(q, agg); } static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp, -- cgit v1.2.3-55-g7522 From a0143efa96671dc51dab9bba776a66f9bfa1757f Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 5 Mar 2013 08:05:00 +0000 Subject: pkt_sched: sch_qfq: prevent budget from wrapping around after a dequeue Aggregate budgets are computed so as to guarantee that, after an aggregate has been selected for service, that aggregate has enough budget to serve at least one maximum-size packet for the classes it contains. For this reason, after a new aggregate has been selected for service, its next packet is immediately dequeued, without any further control. The maximum packet size for a class, lmax, can be changed through qfq_change_class(). In case the user sets lmax to a lower value than the the size of some of the still-to-arrive packets, QFQ+ will automatically push up lmax as it enqueues these packets. This automatic push up is likely to happen with TSO/GSO. In any case, if lmax is assigned a lower value than the size of some of the packets already enqueued for the class, then the following problem may occur: the size of the next packet to dequeue for the class may happen to be larger than lmax, after the aggregate to which the class belongs has been just selected for service. In this case, even the budget of the aggregate, which is an unsigned value, may be lower than the size of the next packet to dequeue. After dequeueing this packet and subtracting its size from the budget, the latter would wrap around. This fix prevents the budget from wrapping around after any packet dequeue. Signed-off-by: Paolo Valente Reviewed-by: Fabio Checconi Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 8c5172c7f193..c34af93ddfe2 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1068,7 +1068,15 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) qdisc_bstats_update(sch, skb); agg_dequeue(in_serv_agg, cl, len); - in_serv_agg->budget -= len; + /* If lmax is lowered, through qfq_change_class, for a class + * owning pending packets with larger size than the new value + * of lmax, then the following condition may hold. + */ + if (unlikely(in_serv_agg->budget < len)) + in_serv_agg->budget = 0; + else + in_serv_agg->budget -= len; + q->V += (u64)len * IWSUM; pr_debug("qfq dequeue: len %u F %lld now %lld\n", len, (unsigned long long) in_serv_agg->F, -- cgit v1.2.3-55-g7522 From 40dd2d546198e7bbb8d3fe718957b158caa3fe52 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 5 Mar 2013 08:05:01 +0000 Subject: pkt_sched: sch_qfq: do not allow virtual time to jump if an aggregate is in service By definition of (the algorithm of) QFQ+, the system virtual time must be pushed up only if there is no 'eligible' aggregate, i.e. no aggregate that would have started to be served also in the ideal system emulated by QFQ+. QFQ+ serves only eligible aggregates, hence the aggregate currently in service is eligible. As a consequence, to decide whether there is no eligible aggregate, QFQ+ must also check whether there is no aggregate in service. Signed-off-by: Paolo Valente Reviewed-by: Fabio Checconi Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index c34af93ddfe2..6b7ce803d07c 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1279,7 +1279,8 @@ static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg) /* group was surely ineligible, remove */ __clear_bit(grp->index, &q->bitmaps[IR]); __clear_bit(grp->index, &q->bitmaps[IB]); - } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V)) + } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) && + q->in_serv_agg == NULL) q->V = roundedS; grp->S = roundedS; -- cgit v1.2.3-55-g7522 From 76e4cb0d3a583900f844f1704b19b7e8c5df8837 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 5 Mar 2013 08:05:02 +0000 Subject: pkt_sched: sch_qfq: remove a useless invocation of qfq_update_eligible QFQ+ can select for service only 'eligible' aggregates, i.e., aggregates that would have started to be served also in the emulated ideal system. As a consequence, for QFQ+ to be work conserving, at least one of the active aggregates must be eligible when it is time to choose the next aggregate to serve. The set of eligible aggregates is updated through the function qfq_update_eligible(), which does guarantee that, after its invocation, at least one of the active aggregates is eligible. Because of this property, this function is invoked in qfq_deactivate_agg() to guarantee that at least one of the active aggregates is still eligible after an aggregate has been deactivated. In particular, the critical case is when there are other active aggregates, but the aggregate being deactivated happens to be the only one eligible. However, this precaution is not needed for QFQ+ to be work conserving, because update_eligible() is always invoked also at the beginning of qfq_choose_next_agg(). This patch removes the additional invocation of update_eligible() in qfq_deactivate_agg(). Signed-off-by: Paolo Valente Reviewed-by: Fabio Checconi Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 6b7ce803d07c..d51852bba01c 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1383,8 +1383,6 @@ static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg) __set_bit(grp->index, &q->bitmaps[s]); } } - - qfq_update_eligible(q); } static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) -- cgit v1.2.3-55-g7522 From 691b3b7e1329141acf1e5ed44d8b468cea065fe3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 4 Mar 2013 12:32:43 +0000 Subject: net: fix new kernel-doc warnings in net core Fix new kernel-doc warnings in net/core/dev.c: Warning(net/core/dev.c:4788): No description found for parameter 'new_carrier' Warning(net/core/dev.c:4788): Excess function parameter 'new_carries' description in 'dev_change_carrier' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index a06a7a58dd11..5ca8734ae63a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4780,7 +4780,7 @@ EXPORT_SYMBOL(dev_set_mac_address); /** * dev_change_carrier - Change device carrier * @dev: device - * @new_carries: new value + * @new_carrier: new value * * Change device carrier */ -- cgit v1.2.3-55-g7522 From d1f41b67ff7735193bc8b418b98ac99a448833e2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Mar 2013 07:15:13 +0000 Subject: net: reduce net_rx_action() latency to 2 HZ We should use time_after_eq() to get maximum latency of two ticks, instead of three. Bug added in commit 24f8b2385 (net: increase receive packet quantum) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 5ca8734ae63a..8f152f904f70 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4103,7 +4103,7 @@ static void net_rx_action(struct softirq_action *h) * Allow this to run for 2 jiffies since which will allow * an average latency of 1.5/HZ. */ - if (unlikely(budget <= 0 || time_after(jiffies, time_limit))) + if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) goto softnet_break; local_irq_enable(); -- cgit v1.2.3-55-g7522 From fa2b04f4502d74659e4e4b1294c6d88e08ece032 Mon Sep 17 00:00:00 2001 From: David Ward Date: Tue, 5 Mar 2013 17:06:32 +0000 Subject: net/ipv4: Timestamp option cannot overflow with prespecified addresses When a router forwards a packet that contains the IPv4 timestamp option, if there is no space left in the option for the router to add its own timestamp, then the router increments the Overflow value in the option. However, if the addresses of the routers are prespecified in the option, then the overflow condition cannot happen: the option is structured so that each prespecified router has a place to write its timestamp. Other routers do not add a timestamp, so there will never be a lack of space. This fix ensures that the Overflow value in the IPv4 timestamp option is not incremented when the addresses of the routers are prespecified, even if the Pointer value is greater than the Length value. Signed-off-by: David Ward Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index f6289bf6f332..310a3647c83d 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -423,7 +423,7 @@ int ip_options_compile(struct net *net, put_unaligned_be32(midtime, timeptr); opt->is_changed = 1; } - } else { + } else if ((optptr[3]&0xF) != IPOPT_TS_PRESPEC) { unsigned int overflow = optptr[3]>>4; if (overflow == 15) { pp_ptr = optptr + 3; -- cgit v1.2.3-55-g7522 From b141e811a0763bb107af4cd99d456193ccdb8053 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 21 Feb 2013 11:04:45 +0100 Subject: NFC: llcp: Decrease socket ack log when accepting a connection This is really difficult to test with real NFC devices, but without this fix an LLCP server will eventually refuse new connections. Signed-off-by: Samuel Ortiz --- net/nfc/llcp/sock.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 5332751943a9..5c7cdf3f2a83 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -278,6 +278,8 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent, pr_debug("Returning sk state %d\n", sk->sk_state); + sk_acceptq_removed(parent); + return sk; } -- cgit v1.2.3-55-g7522 From 3536da06db0baa675f32de608c0a4c0f5ef0e9ff Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 21 Feb 2013 15:40:04 +0100 Subject: NFC: llcp: Clean local timers and works when removing a device Whenever an adapter is removed we must clean all the local structures, especially the timers and scheduled work. Otherwise those asynchronous threads will eventually try to access the freed nfc_dev pointer if an LLCP link is up. Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 7f8266dd14cb..77e1d97b3996 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -142,20 +142,25 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) return local; } -static void local_release(struct kref *ref) +static void local_cleanup(struct nfc_llcp_local *local, bool listen) { - struct nfc_llcp_local *local; - - local = container_of(ref, struct nfc_llcp_local, ref); - - list_del(&local->list); - nfc_llcp_socket_release(local, false); + nfc_llcp_socket_release(local, listen); del_timer_sync(&local->link_timer); skb_queue_purge(&local->tx_queue); cancel_work_sync(&local->tx_work); cancel_work_sync(&local->rx_work); cancel_work_sync(&local->timeout_work); kfree_skb(local->rx_pending); +} + +static void local_release(struct kref *ref) +{ + struct nfc_llcp_local *local; + + local = container_of(ref, struct nfc_llcp_local, ref); + + list_del(&local->list); + local_cleanup(local, false); kfree(local); } @@ -1427,6 +1432,8 @@ void nfc_llcp_unregister_device(struct nfc_dev *dev) return; } + local_cleanup(local, false); + nfc_llcp_local_put(local); } -- cgit v1.2.3-55-g7522 From e6a3a4bb856a6fba551b43376c80f45836132710 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 21 Feb 2013 16:33:30 +0100 Subject: NFC: llcp: Clean raw sockets from nfc_llcp_socket_release Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 77e1d97b3996..8a35423ead54 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -133,6 +133,35 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) } write_unlock(&local->sockets.lock); + + /* + * If we want to keep the listening sockets alive, + * we don't touch the RAW ones. + */ + if (listen == true) + return; + + write_lock(&local->raw_sockets.lock); + + sk_for_each_safe(sk, tmp, &local->raw_sockets.head) { + llcp_sock = nfc_llcp_sock(sk); + + bh_lock_sock(sk); + + nfc_llcp_socket_purge(llcp_sock); + + sk->sk_state = LLCP_CLOSED; + + sk->sk_state_change(sk); + + bh_unlock_sock(sk); + + sock_orphan(sk); + + sk_del_node_init(sk); + } + + write_unlock(&local->raw_sockets.lock); } struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) -- cgit v1.2.3-55-g7522 From 3bbc0ceb7ac2bf694d31362eea2c71a680e5deeb Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 21 Feb 2013 17:01:06 +0100 Subject: NFC: llcp: Report error to pending sockets when a device is removed Signed-off-by: Samuel Ortiz --- net/nfc/llcp/llcp.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 8a35423ead54..b530afadd76c 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -68,7 +68,8 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock) } } -static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) +static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, + int err) { struct sock *sk; struct hlist_node *tmp; @@ -100,7 +101,10 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) nfc_llcp_accept_unlink(accept_sk); + if (err) + accept_sk->sk_err = err; accept_sk->sk_state = LLCP_CLOSED; + accept_sk->sk_state_change(sk); bh_unlock_sock(accept_sk); @@ -123,7 +127,10 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) continue; } + if (err) + sk->sk_err = err; sk->sk_state = LLCP_CLOSED; + sk->sk_state_change(sk); bh_unlock_sock(sk); @@ -150,8 +157,9 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen) nfc_llcp_socket_purge(llcp_sock); + if (err) + sk->sk_err = err; sk->sk_state = LLCP_CLOSED; - sk->sk_state_change(sk); bh_unlock_sock(sk); @@ -173,7 +181,7 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) static void local_cleanup(struct nfc_llcp_local *local, bool listen) { - nfc_llcp_socket_release(local, listen); + nfc_llcp_socket_release(local, listen, ENXIO); del_timer_sync(&local->link_timer); skb_queue_purge(&local->tx_queue); cancel_work_sync(&local->tx_work); @@ -1382,7 +1390,7 @@ void nfc_llcp_mac_is_down(struct nfc_dev *dev) return; /* Close and purge all existing sockets */ - nfc_llcp_socket_release(local, true); + nfc_llcp_socket_release(local, true, 0); } void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, -- cgit v1.2.3-55-g7522