From e5f06f720eff24e32f1cc08ec03bcc8c4b2d2934 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky
Date: Wed, 11 Apr 2012 15:13:28 +0400
Subject: nfsd: make expkey cache allocated per network namespace context

This patch also changes svcauth_unix_purge() function: added network namespace
as a parameter and thus loop over all networks was replaced by only one call
for ip map cache purge.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcauth_unix.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 521d8f7dc833..9c3b9f014468 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -346,17 +346,12 @@ static inline int ip_map_update(struct net *net, struct ip_map *ipm,
 	return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry);
 }
 
-
-void svcauth_unix_purge(void)
+void svcauth_unix_purge(struct net *net)
 {
-	struct net *net;
-
-	for_each_net(net) {
-		struct sunrpc_net *sn;
+	struct sunrpc_net *sn;
 
-		sn = net_generic(net, sunrpc_net_id);
-		cache_purge(sn->ip_map_cache);
-	}
+	sn = net_generic(net, sunrpc_net_id);
+	cache_purge(sn->ip_map_cache);
 }
 EXPORT_SYMBOL_GPL(svcauth_unix_purge);
 
-- 
cgit v1.2.3-55-g7522


From fc2952a2a9f724f7d200c69a2b059ec331a80363 Mon Sep 17 00:00:00 2001
From: Simo Sorce
Date: Tue, 17 Apr 2012 09:39:06 -0400
Subject: SUNRPC: split upcall function to extract reusable parts

This is needed to share code between the current server upcall mechanism
and the new gssproxy upcall mechanism introduced in a following patch.

Signed-off-by: Simo Sorce <simo@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/auth_gss/svcauth_gss.c | 100 ++++++++++++++++++++++++--------------
 1 file changed, 64 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 1600cfb1618c..f0a0cd4470b7 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -964,16 +964,17 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
 }
 
 static inline int
-gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp, struct rsi *rsip)
+gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp,
+		struct xdr_netobj *out_handle, int *major_status)
 {
 	struct rsc *rsci;
 	int        rc;
 
-	if (rsip->major_status != GSS_S_COMPLETE)
+	if (*major_status != GSS_S_COMPLETE)
 		return gss_write_null_verf(rqstp);
-	rsci = gss_svc_searchbyctx(cd, &rsip->out_handle);
+	rsci = gss_svc_searchbyctx(cd, out_handle);
 	if (rsci == NULL) {
-		rsip->major_status = GSS_S_NO_CONTEXT;
+		*major_status = GSS_S_NO_CONTEXT;
 		return gss_write_null_verf(rqstp);
 	}
 	rc = gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN);
@@ -981,22 +982,13 @@ gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp, struct rsi
 	return rc;
 }
 
-/*
- * Having read the cred already and found we're in the context
- * initiation case, read the verifier and initiate (or check the results
- * of) upcalls to userspace for help with context initiation.  If
- * the upcall results are available, write the verifier and result.
- * Otherwise, drop the request pending an answer to the upcall.
- */
-static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
-			struct rpc_gss_wire_cred *gc, __be32 *authp)
+static inline int
+gss_read_verf(struct rpc_gss_wire_cred *gc,
+	      struct kvec *argv, __be32 *authp,
+	      struct xdr_netobj *in_handle,
+	      struct xdr_netobj *in_token)
 {
-	struct kvec *argv = &rqstp->rq_arg.head[0];
-	struct kvec *resv = &rqstp->rq_res.head[0];
 	struct xdr_netobj tmpobj;
-	struct rsi *rsip, rsikey;
-	int ret;
-	struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
 
 	/* Read the verifier; should be NULL: */
 	*authp = rpc_autherr_badverf;
@@ -1006,24 +998,67 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
 		return SVC_DENIED;
 	if (svc_getnl(argv) != 0)
 		return SVC_DENIED;
-
 	/* Martial context handle and token for upcall: */
 	*authp = rpc_autherr_badcred;
 	if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0)
 		return SVC_DENIED;
-	memset(&rsikey, 0, sizeof(rsikey));
-	if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
+	if (dup_netobj(in_handle, &gc->gc_ctx))
 		return SVC_CLOSE;
 	*authp = rpc_autherr_badverf;
 	if (svc_safe_getnetobj(argv, &tmpobj)) {
-		kfree(rsikey.in_handle.data);
+		kfree(in_handle->data);
 		return SVC_DENIED;
 	}
-	if (dup_netobj(&rsikey.in_token, &tmpobj)) {
-		kfree(rsikey.in_handle.data);
+	if (dup_netobj(in_token, &tmpobj)) {
+		kfree(in_handle->data);
 		return SVC_CLOSE;
 	}
 
+	return 0;
+}
+
+static inline int
+gss_write_resv(struct kvec *resv, size_t size_limit,
+	       struct xdr_netobj *out_handle, struct xdr_netobj *out_token,
+	       int major_status, int minor_status)
+{
+	if (resv->iov_len + 4 > size_limit)
+		return -1;
+	svc_putnl(resv, RPC_SUCCESS);
+	if (svc_safe_putnetobj(resv, out_handle))
+		return -1;
+	if (resv->iov_len + 3 * 4 > size_limit)
+		return -1;
+	svc_putnl(resv, major_status);
+	svc_putnl(resv, minor_status);
+	svc_putnl(resv, GSS_SEQ_WIN);
+	if (svc_safe_putnetobj(resv, out_token))
+		return -1;
+	return 0;
+}
+
+/*
+ * Having read the cred already and found we're in the context
+ * initiation case, read the verifier and initiate (or check the results
+ * of) upcalls to userspace for help with context initiation.  If
+ * the upcall results are available, write the verifier and result.
+ * Otherwise, drop the request pending an answer to the upcall.
+ */
+static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
+			struct rpc_gss_wire_cred *gc, __be32 *authp)
+{
+	struct kvec *argv = &rqstp->rq_arg.head[0];
+	struct kvec *resv = &rqstp->rq_res.head[0];
+	struct rsi *rsip, rsikey;
+	int ret;
+	struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
+
+	memset(&rsikey, 0, sizeof(rsikey));
+	ret = gss_read_verf(gc, argv, authp,
+			    &rsikey.in_handle, &rsikey.in_token);
+	if (ret)
+		return ret;
+
 	/* Perform upcall, or find upcall result: */
 	rsip = rsi_lookup(sn->rsi_cache, &rsikey);
 	rsi_free(&rsikey);
@@ -1035,19 +1070,12 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
 
 	ret = SVC_CLOSE;
 	/* Got an answer to the upcall; use it: */
-	if (gss_write_init_verf(sn->rsc_cache, rqstp, rsip))
+	if (gss_write_init_verf(sn->rsc_cache, rqstp,
+				&rsip->out_handle, &rsip->major_status))
 		goto out;
-	if (resv->iov_len + 4 > PAGE_SIZE)
-		goto out;
-	svc_putnl(resv, RPC_SUCCESS);
-	if (svc_safe_putnetobj(resv, &rsip->out_handle))
-		goto out;
-	if (resv->iov_len + 3 * 4 > PAGE_SIZE)
-		goto out;
-	svc_putnl(resv, rsip->major_status);
-	svc_putnl(resv, rsip->minor_status);
-	svc_putnl(resv, GSS_SEQ_WIN);
-	if (svc_safe_putnetobj(resv, &rsip->out_token))
+	if (gss_write_resv(resv, PAGE_SIZE,
+			   &rsip->out_handle, &rsip->out_token,
+			   rsip->major_status, rsip->minor_status))
 		goto out;
 
 	ret = SVC_COMPLETE;
-- 
cgit v1.2.3-55-g7522


From f0d1b3c2bcc5de8a17af5f2274f7fcde8292b5fc Mon Sep 17 00:00:00 2001
From: Joe Perches
Date: Thu, 24 May 2012 07:13:30 +0000
Subject: net/wanrouter: Deprecate and schedule for removal

No one uses this on current kernels anymore.

Let it be known it's going to be removed eventually.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/feature-removal-schedule.txt | 6 ++++++
 net/wanrouter/Kconfig                      | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 50d82ae09e2a..6d3dcd4eb649 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -588,3 +588,9 @@ Why:	Remount currently allows changing bound subsystems and
 	replaced with conventional fsnotify.
 
 ----------------------------
+
+What:	net/wanrouter/
+When:	June 2013
+Why:	Unsupported/unmaintained/unused since 2.6
+
+----------------------------
diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig
index 61ceae0b9566..a157a2e64e18 100644
--- a/net/wanrouter/Kconfig
+++ b/net/wanrouter/Kconfig
@@ -3,7 +3,7 @@
 #
 
 config WAN_ROUTER
-	tristate "WAN router"
+	tristate "WAN router (DEPRECATED)"
 	depends on EXPERIMENTAL
 	---help---
 	  Wide Area Networks (WANs), such as X.25, frame relay and leased
-- 
cgit v1.2.3-55-g7522


From 992e68bf2eb9f3319f098560b8d4a03aa52fd7b8 Mon Sep 17 00:00:00 2001
From: Soumik Das
Date: Sun, 20 May 2012 15:31:13 +0530
Subject: mac80211: Fix race in checking AP status by sending null frame

mac80211 tries to verify the existence of the current AP by
probing or sending a NULL frame in function
ieee80211_mgd_probe_ap_send. It 1st sends a null frame to the AP,
increments probe_send_count and waits for the ACK to the NULL
frame for a finite duration of time. At times, it happens that by
the time mac80211 gets to increment probe_send_count, the ACK for
the NULL frame transmitted has already been processed. This leads
to a race condition where mac80211 times out waiting for the ACK
for the NULL frame causing unnecessary disconnection with the AP.

Signed-off-by: Soumik Das <soumik.das@stericsson.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b3b3c264ff66..04c306308987 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1522,6 +1522,8 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 	 * anymore. The timeout will be reset if the frame is ACKed by
 	 * the AP.
 	 */
+	ifmgd->probe_send_count++;
+
 	if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) {
 		ifmgd->nullfunc_failed = false;
 		ieee80211_send_nullfunc(sdata->local, sdata, 0);
@@ -1538,7 +1540,6 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 					 0, (u32) -1, true, false);
 	}
 
-	ifmgd->probe_send_count++;
 	ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
 	run_again(ifmgd, ifmgd->probe_timeout);
 	if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
-- 
cgit v1.2.3-55-g7522


From 91657eafb64b4cb53ec3a2fbc4afc3497f735788 Mon Sep 17 00:00:00 2001
From: Benjamin Poirier
Date: Thu, 24 May 2012 11:32:38 +0000
Subject: xfrm: take net hdr len into account for esp payload size calculation

Corrects the function that determines the esp payload size. The calculations
done in esp{4,6}_get_mtu() lead to overlength frames in transport mode for
certain mtu values and suboptimal frames for others.

According to what is done, mainly in esp{,6}_output() and tcp_mtu_to_mss(),
net_header_len must be taken into account before doing the alignment
calculation.

Signed-off-by: Benjamin Poirier <bpoirier@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/esp4.c | 24 +++++++++---------------
 net/ipv6/esp6.c | 18 +++++++-----------
 2 files changed, 16 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 89a47b35905d..cb982a61536f 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -459,28 +459,22 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
 	struct esp_data *esp = x->data;
 	u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4);
 	u32 align = max_t(u32, blksize, esp->padlen);
-	u32 rem;
-
-	mtu -= x->props.header_len + crypto_aead_authsize(esp->aead);
-	rem = mtu & (align - 1);
-	mtu &= ~(align - 1);
+	unsigned int net_adj;
 
 	switch (x->props.mode) {
-	case XFRM_MODE_TUNNEL:
-		break;
-	default:
 	case XFRM_MODE_TRANSPORT:
-		/* The worst case */
-		mtu -= blksize - 4;
-		mtu += min_t(u32, blksize - 4, rem);
-		break;
 	case XFRM_MODE_BEET:
-		/* The worst case. */
-		mtu += min_t(u32, IPV4_BEET_PHMAXLEN, rem);
+		net_adj = sizeof(struct iphdr);
 		break;
+	case XFRM_MODE_TUNNEL:
+		net_adj = 0;
+		break;
+	default:
+		BUG();
 	}
 
-	return mtu - 2;
+	return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) -
+		 net_adj) & ~(align - 1)) + (net_adj - 2);
 }
 
 static void esp4_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 1e62b7557b00..db1521fcda5b 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -413,19 +413,15 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
 	struct esp_data *esp = x->data;
 	u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4);
 	u32 align = max_t(u32, blksize, esp->padlen);
-	u32 rem;
+	unsigned int net_adj;
 
-	mtu -= x->props.header_len + crypto_aead_authsize(esp->aead);
-	rem = mtu & (align - 1);
-	mtu &= ~(align - 1);
-
-	if (x->props.mode != XFRM_MODE_TUNNEL) {
-		u32 padsize = ((blksize - 1) & 7) + 1;
-		mtu -= blksize - padsize;
-		mtu += min_t(u32, blksize - padsize, rem);
-	}
+	if (x->props.mode != XFRM_MODE_TUNNEL)
+		net_adj = sizeof(struct ipv6hdr);
+	else
+		net_adj = 0;
 
-	return mtu - 2;
+	return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) -
+		 net_adj) & ~(align - 1)) + (net_adj - 2);
 }
 
 static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-- 
cgit v1.2.3-55-g7522


From 0c1833797a5a6ec23ea9261d979aa18078720b74 Mon Sep 17 00:00:00 2001
From: Gao feng
Date: Sat, 26 May 2012 01:30:53 +0000
Subject: ipv6: fix incorrect ipsec fragment

Since commit ad0081e43a
"ipv6: Fragment locally generated tunnel-mode IPSec6 packets as needed"
the fragment of packets is incorrect.
because tunnel mode needs IPsec headers and trailer for all fragments,
while on transport mode it is sufficient to add the headers to the
first fragment and the trailer to the last.

so modify mtu and maxfraglen base on ipsec mode and if fragment is first
or last.

with my test,it work well(every fragment's size is the mtu)
and does not trigger slow fragment path.

Changes from v1:
	though optimization, mtu_prev and maxfraglen_prev can be delete.
	replace xfrm mode codes with dst_entry's new frag DST_XFRM_TUNNEL.
	add fuction ip6_append_data_mtu to make codes clearer.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h      |  1 +
 net/ipv6/ip6_output.c  | 68 +++++++++++++++++++++++++++++++++++++-------------
 net/xfrm/xfrm_policy.c |  3 +++
 3 files changed, 54 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/include/net/dst.h b/include/net/dst.h
index bed833d9796a..8197eadca819 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -60,6 +60,7 @@ struct dst_entry {
 #define DST_NOCOUNT		0x0020
 #define DST_NOPEER		0x0040
 #define DST_FAKE_RTABLE		0x0080
+#define DST_XFRM_TUNNEL		0x0100
 
 	short			error;
 	short			obsolete;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d99fdc699625..17b8c67998bb 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1187,6 +1187,29 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
 }
 
+static void ip6_append_data_mtu(int *mtu,
+				int *maxfraglen,
+				unsigned int fragheaderlen,
+				struct sk_buff *skb,
+				struct rt6_info *rt)
+{
+	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
+		if (skb == NULL) {
+			/* first fragment, reserve header_len */
+			*mtu = *mtu - rt->dst.header_len;
+
+		} else {
+			/*
+			 * this fragment is not first, the headers
+			 * space is regarded as data space.
+			 */
+			*mtu = dst_mtu(rt->dst.path);
+		}
+		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
+			      + fragheaderlen - sizeof(struct frag_hdr);
+	}
+}
+
 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	int offset, int len, int odd, struct sk_buff *skb),
 	void *from, int length, int transhdrlen,
@@ -1196,7 +1219,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct inet_cork *cork;
-	struct sk_buff *skb;
+	struct sk_buff *skb, *skb_prev = NULL;
 	unsigned int maxfraglen, fragheaderlen;
 	int exthdrlen;
 	int dst_exthdrlen;
@@ -1253,8 +1276,12 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		inet->cork.fl.u.ip6 = *fl6;
 		np->cork.hop_limit = hlimit;
 		np->cork.tclass = tclass;
-		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
-		      rt->dst.dev->mtu : dst_mtu(&rt->dst);
+		if (rt->dst.flags & DST_XFRM_TUNNEL)
+			mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
+			      rt->dst.dev->mtu : dst_mtu(&rt->dst);
+		else
+			mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
+			      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
 		if (np->frag_size < mtu) {
 			if (np->frag_size)
 				mtu = np->frag_size;
@@ -1350,25 +1377,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 			unsigned int fraglen;
 			unsigned int fraggap;
 			unsigned int alloclen;
-			struct sk_buff *skb_prev;
 alloc_new_skb:
-			skb_prev = skb;
-
 			/* There's no room in the current skb */
-			if (skb_prev)
-				fraggap = skb_prev->len - maxfraglen;
+			if (skb)
+				fraggap = skb->len - maxfraglen;
 			else
 				fraggap = 0;
+			/* update mtu and maxfraglen if necessary */
+			if (skb == NULL || skb_prev == NULL)
+				ip6_append_data_mtu(&mtu, &maxfraglen,
+						    fragheaderlen, skb, rt);
+
+			skb_prev = skb;
 
 			/*
 			 * If remaining data exceeds the mtu,
 			 * we know we need more fragment(s).
 			 */
 			datalen = length + fraggap;
-			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
-				datalen = maxfraglen - fragheaderlen;
 
-			fraglen = datalen + fragheaderlen;
+			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
+				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
 			if ((flags & MSG_MORE) &&
 			    !(rt->dst.dev->features&NETIF_F_SG))
 				alloclen = mtu;
@@ -1377,13 +1406,16 @@ alloc_new_skb:
 
 			alloclen += dst_exthdrlen;
 
-			/*
-			 * The last fragment gets additional space at tail.
-			 * Note: we overallocate on fragments with MSG_MODE
-			 * because we have no idea if we're the last one.
-			 */
-			if (datalen == length + fraggap)
-				alloclen += rt->dst.trailer_len;
+			if (datalen != length + fraggap) {
+				/*
+				 * this is not the last fragment, the trailer
+				 * space is regarded as data space.
+				 */
+				datalen += rt->dst.trailer_len;
+			}
+
+			alloclen += rt->dst.trailer_len;
+			fraglen = datalen + fragheaderlen;
 
 			/*
 			 * We just reserve space for fragment header.
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c53e8f42aa75..ccfbd328a69d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1921,6 +1921,9 @@ no_transform:
 	}
 ok:
 	xfrm_pols_put(pols, drop_pols);
+	if (dst && dst->xfrm &&
+	    dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
+		dst->flags |= DST_XFRM_TUNNEL;
 	return dst;
 
 nopol:
-- 
cgit v1.2.3-55-g7522


From c26a0e105cf11fa482116cc59a7c6cd9c01dd61b Mon Sep 17 00:00:00 2001
From: Claudio Pisa
Date: Mon, 28 May 2012 13:06:25 +0100
Subject: mac80211: fix flag check for QoS NOACK frames

Signed-off-by: Claudio Pisa <claudio.pisa@uniroma2.it>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 5f827a6b0d8d..847215bb2a6f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -153,7 +153,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
 
 	/* Don't calculate ACKs for QoS Frames with NoAck Policy set */
 	if (ieee80211_is_data_qos(hdr->frame_control) &&
-	    *(ieee80211_get_qos_ctl(hdr)) | IEEE80211_QOS_CTL_ACK_POLICY_NOACK)
+	    *(ieee80211_get_qos_ctl(hdr)) & IEEE80211_QOS_CTL_ACK_POLICY_NOACK)
 		dur = 0;
 	else
 		/* Time needed to transmit ACK
-- 
cgit v1.2.3-55-g7522


From 7b21aea04d084916ac4e0e8852dcc9cd60ec0d1d Mon Sep 17 00:00:00 2001
From: Eyal Shapira
Date: Tue, 29 May 2012 02:00:22 -0700
Subject: mac80211: fix ADDBA declined after suspend with wowlan

WLAN_STA_BLOCK_BA is set while suspending but doesn't get cleared
when resuming in case of wowlan. This causes further ADDBA requests
received to be rejected. Fix it by clearing it in the wowlan path
as well.

Signed-off-by: Eyal Shapira <eyal@wizery.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Cc: stable@vger.kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/util.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 22f2216b397e..a44c6807df01 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1371,6 +1371,12 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		}
 	}
 
+	/* add back keys */
+	list_for_each_entry(sdata, &local->interfaces, list)
+		if (ieee80211_sdata_running(sdata))
+			ieee80211_enable_keys(sdata);
+
+ wake_up:
 	/*
 	 * Clear the WLAN_STA_BLOCK_BA flag so new aggregation
 	 * sessions can be established after a resume.
@@ -1392,12 +1398,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		mutex_unlock(&local->sta_mtx);
 	}
 
-	/* add back keys */
-	list_for_each_entry(sdata, &local->interfaces, list)
-		if (ieee80211_sdata_running(sdata))
-			ieee80211_enable_keys(sdata);
-
- wake_up:
 	ieee80211_wake_queues_by_reason(hw,
 			IEEE80211_QUEUE_STOP_REASON_SUSPEND);
 
-- 
cgit v1.2.3-55-g7522


From c51ce49735c183ef2592db70f918ee698716276b Mon Sep 17 00:00:00 2001
From: James Chapman
Date: Tue, 29 May 2012 03:30:42 +0000
Subject: l2tp: fix oops in L2TP IP sockets for connect() AF_UNSPEC case

An application may call connect() to disconnect a socket using an
address with family AF_UNSPEC. The L2TP IP sockets were not handling
this case when the socket is not bound and an attempt to connect()
using AF_UNSPEC in such cases would result in an oops. This patch
addresses the problem by protecting the sk_prot->disconnect() call
against trying to unhash the socket before it is bound.

The L2TP IPv4 and IPv6 sockets have the same problem. Both are fixed
by this patch.

The patch also adds more checks that the sockaddr supplied to bind()
and connect() calls is valid.

 RIP: 0010:[<ffffffff82e133b0>]  [<ffffffff82e133b0>] inet_unhash+0x50/0xd0
 RSP: 0018:ffff88001989be28  EFLAGS: 00010293
 Stack:
  ffff8800407a8000 0000000000000000 ffff88001989be78 ffffffff82e3a249
  ffffffff82e3a050 ffff88001989bec8 ffff88001989be88 ffff8800407a8000
  0000000000000010 ffff88001989bec8 ffff88001989bea8 ffffffff82e42639
 Call Trace:
 [<ffffffff82e3a249>] udp_disconnect+0x1f9/0x290
 [<ffffffff82e42639>] inet_dgram_connect+0x29/0x80
 [<ffffffff82d012fc>] sys_connect+0x9c/0x100

Reported-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c  | 24 ++++++++++++++++++++++--
 net/l2tp/l2tp_ip6.c | 18 +++++++++++++++++-
 2 files changed, 39 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 889f5d13d7ba..70614e7affab 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -239,9 +239,16 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr;
-	int ret = -EINVAL;
+	int ret;
 	int chk_addr_ret;
 
+	if (!sock_flag(sk, SOCK_ZAPPED))
+		return -EINVAL;
+	if (addr_len < sizeof(struct sockaddr_l2tpip))
+		return -EINVAL;
+	if (addr->l2tp_family != AF_INET)
+		return -EINVAL;
+
 	ret = -EADDRINUSE;
 	read_lock_bh(&l2tp_ip_lock);
 	if (__l2tp_ip_bind_lookup(&init_net, addr->l2tp_addr.s_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id))
@@ -272,6 +279,8 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	sk_del_node_init(sk);
 	write_unlock_bh(&l2tp_ip_lock);
 	ret = 0;
+	sock_reset_flag(sk, SOCK_ZAPPED);
+
 out:
 	release_sock(sk);
 
@@ -288,6 +297,9 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
 	int rc;
 
+	if (sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */
+		return -EINVAL;
+
 	if (addr_len < sizeof(*lsa))
 		return -EINVAL;
 
@@ -311,6 +323,14 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	return rc;
 }
 
+static int l2tp_ip_disconnect(struct sock *sk, int flags)
+{
+	if (sock_flag(sk, SOCK_ZAPPED))
+		return 0;
+
+	return udp_disconnect(sk, flags);
+}
+
 static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
 			   int *uaddr_len, int peer)
 {
@@ -530,7 +550,7 @@ static struct proto l2tp_ip_prot = {
 	.close		   = l2tp_ip_close,
 	.bind		   = l2tp_ip_bind,
 	.connect	   = l2tp_ip_connect,
-	.disconnect	   = udp_disconnect,
+	.disconnect	   = l2tp_ip_disconnect,
 	.ioctl		   = udp_ioctl,
 	.destroy	   = l2tp_ip_destroy_sock,
 	.setsockopt	   = ip_setsockopt,
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 0291d8d85f30..35e1e4bde587 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -258,6 +258,10 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	int addr_type;
 	int err;
 
+	if (!sock_flag(sk, SOCK_ZAPPED))
+		return -EINVAL;
+	if (addr->l2tp_family != AF_INET6)
+		return -EINVAL;
 	if (addr_len < sizeof(*addr))
 		return -EINVAL;
 
@@ -331,6 +335,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	sk_del_node_init(sk);
 	write_unlock_bh(&l2tp_ip6_lock);
 
+	sock_reset_flag(sk, SOCK_ZAPPED);
 	release_sock(sk);
 	return 0;
 
@@ -354,6 +359,9 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
 	int	addr_type;
 	int rc;
 
+	if (sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */
+		return -EINVAL;
+
 	if (addr_len < sizeof(*lsa))
 		return -EINVAL;
 
@@ -383,6 +391,14 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
 	return rc;
 }
 
+static int l2tp_ip6_disconnect(struct sock *sk, int flags)
+{
+	if (sock_flag(sk, SOCK_ZAPPED))
+		return 0;
+
+	return udp_disconnect(sk, flags);
+}
+
 static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
 			    int *uaddr_len, int peer)
 {
@@ -689,7 +705,7 @@ static struct proto l2tp_ip6_prot = {
 	.close		   = l2tp_ip6_close,
 	.bind		   = l2tp_ip6_bind,
 	.connect	   = l2tp_ip6_connect,
-	.disconnect	   = udp_disconnect,
+	.disconnect	   = l2tp_ip6_disconnect,
 	.ioctl		   = udp_ioctl,
 	.destroy	   = l2tp_ip6_destroy_sock,
 	.setsockopt	   = ipv6_setsockopt,
-- 
cgit v1.2.3-55-g7522


From a0c6ffbcfe600606b2d913dded4dc6b37b3bbbfd Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo
Date: Mon, 28 May 2012 08:52:05 +0000
Subject: rds_rdma: don't assume infiniband device is PCI

RDS code assumes that the struct ib_device dma_device member, which is a
pointer, points to a struct device embedded in a struct pci_dev.

This is not the case for ehca, for example, which is a OF driver, and
makes dma_device point to a struct device embedded in a struct
platform_device.

This will make the system crash when rds_rdma is loaded in a system
with ehca, since it will try to access the bus member of a non-existent
struct pci_dev.

The only reason rds_rdma uses the struct pci_dev is to get the NUMA node
the device is attached to. Using dev_to_node for that is much better,
since it won't assume which bus the infiniband is attached to.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@linux.vnet.ibm.com>
Cc: dledford@redhat.com
Cc: Jes.Sorensen@redhat.com
Cc: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
Acked-by: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rds/ib.h b/net/rds/ib.h
index edfaaaf164eb..8d2b3d5a7c21 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -186,8 +186,7 @@ struct rds_ib_device {
 	struct work_struct	free_work;
 };
 
-#define pcidev_to_node(pcidev) pcibus_to_node(pcidev->bus)
-#define ibdev_to_node(ibdev) pcidev_to_node(to_pci_dev(ibdev->dma_device))
+#define ibdev_to_node(ibdev) dev_to_node(ibdev->dma_device)
 #define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
 
 /* bits for i_ack_flags */
-- 
cgit v1.2.3-55-g7522


From e9412c37082b5c932e83364aaed0c38c2ce33acb Mon Sep 17 00:00:00 2001
From: Neil Horman
Date: Tue, 29 May 2012 09:30:41 +0000
Subject: genetlink: Build a generic netlink family module alias

Generic netlink searches for -type- formatted aliases when requesting a module to
fulfill a protocol request (i.e. net-pf-16-proto-16-type-<x>, where x is a type
value).  However generic netlink protocols have no well defined type numbers,
they have string names.  Modify genl_ctrl_getfamily to request an alias in the
format net-pf-16-proto-16-family-<x> instead, where x is a generic string, and
add a macro that builds on the previously added MODULE_ALIAS_NET_PF_PROTO_NAME
macro to allow modules to specifify those generic strings.

Note, l2tp previously hacked together an net-pf-16-proto-16-type-l2tp alias
using the MODULE_ALIAS macro, with these updates we can convert that to use the
PROTO_NAME macro.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: James Chapman <jchapman@katalix.com>
CC: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/genetlink.h | 3 +++
 net/l2tp/l2tp_netlink.c   | 3 +--
 net/netlink/genetlink.c   | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index 73c28dea10ae..7a114016ac7d 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -110,6 +110,9 @@ extern int lockdep_genl_is_held(void);
 #define genl_dereference(p)					\
 	rcu_dereference_protected(p, lockdep_genl_is_held())
 
+#define MODULE_ALIAS_GENL_FAMILY(family)\
+ MODULE_ALIAS_NET_PF_PROTO_NAME(PF_NETLINK, NETLINK_GENERIC, "-family-" family)
+
 #endif /* __KERNEL__ */
 
 #endif	/* __LINUX_GENERIC_NETLINK_H */
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 8577264378fe..ddc553e76671 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -923,5 +923,4 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
 MODULE_DESCRIPTION("L2TP netlink");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("1.0");
-MODULE_ALIAS("net-pf-" __stringify(PF_NETLINK) "-proto-" \
-	     __stringify(NETLINK_GENERIC) "-type-" "l2tp");
+MODULE_ALIAS_GENL_FAMILY("l2tp");
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 8340ace837f2..2cc7c1ee7690 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -836,7 +836,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
 #ifdef CONFIG_MODULES
 		if (res == NULL) {
 			genl_unlock();
-			request_module("net-pf-%d-proto-%d-type-%s",
+			request_module("net-pf-%d-proto-%d-family-%s",
 				       PF_NETLINK, NETLINK_GENERIC, name);
 			genl_lock();
 			res = genl_family_find_byname(name);
-- 
cgit v1.2.3-55-g7522


From 3fdcbd453152329002f12dfda0be90b714458164 Mon Sep 17 00:00:00 2001
From: Neil Horman
Date: Tue, 29 May 2012 09:30:42 +0000
Subject: drop_monitor: Add module alias to enable automatic module loading

Now that we have module alias macros for generic netlink families, lets use
those to mark modules with the appropriate family names for loading

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/drop_monitor.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 3252e7e0a005..ea5fb9fcc3f5 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -468,3 +468,4 @@ module_exit(exit_net_drop_monitor);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Neil Horman <nhorman@tuxdriver.com>");
+MODULE_ALIAS_GENL_FAMILY("NET_DM");
-- 
cgit v1.2.3-55-g7522


From b3b02ae5865c2dcd506322e0fc6def59a042e72f Mon Sep 17 00:00:00 2001
From: Trond Myklebust
Date: Thu, 31 May 2012 15:26:38 -0400
Subject: NFSv4.1: Fix a request leak on the back channel

If the call to svc_process_common() fails, then the request
needs to be freed before we can exit bc_svc_process.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@vger.kernel.org
---
 net/sunrpc/svc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 017c0117d154..074df5a564db 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1377,7 +1377,8 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
 						sizeof(req->rq_snd_buf));
 		return bc_send(req);
 	} else {
-		/* Nothing to do to drop request */
+		/* drop request */
+		xprt_free_bc_request(req);
 		return 0;
 	}
 }
-- 
cgit v1.2.3-55-g7522


From cc9b17ad29ecaa20bfe426a8d4dbfb94b13ff1cc Mon Sep 17 00:00:00 2001
From: Jason Wang
Date: Wed, 30 May 2012 21:18:10 +0000
Subject: net: sock: validate data_len before allocating skb in
 sock_alloc_send_pskb()

We need to validate the number of pages consumed by data_len, otherwise frags
array could be overflowed by userspace. So this patch validate data_len and
return -EMSGSIZE when data_len may occupies more frags than MAX_SKB_FRAGS.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 653f8c0aedc5..9e5b71fda6ec 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1592,6 +1592,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
 	gfp_t gfp_mask;
 	long timeo;
 	int err;
+	int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+
+	err = -EMSGSIZE;
+	if (npages > MAX_SKB_FRAGS)
+		goto failure;
 
 	gfp_mask = sk->sk_allocation;
 	if (gfp_mask & __GFP_WAIT)
@@ -1610,14 +1615,12 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
 			skb = alloc_skb(header_len, gfp_mask);
 			if (skb) {
-				int npages;
 				int i;
 
 				/* No pages, we're done... */
 				if (!data_len)
 					break;
 
-				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
 				skb->truesize += data_len;
 				skb_shinfo(skb)->nr_frags = npages;
 				for (i = 0; i < npages; i++) {
-- 
cgit v1.2.3-55-g7522


From c52226daf553b21891f39777d78a54ea4e7e8654 Mon Sep 17 00:00:00 2001
From: J. Bruce Fields
Date: Wed, 11 Apr 2012 20:08:45 -0400
Subject: rpc: handle rotated gss data for Windows interoperability

The data in Kerberos gss tokens can be rotated.  But we were lazy and
rejected any nonzero rotation value.  It wasn't necessary for the
implementations we were testing against at the time.

But it appears that Windows does use a nonzero value here.

So, implement rotation to bring ourselves into compliance with the spec
and to interoperate with Windows.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/auth_gss/gss_krb5_wrap.c | 61 +++++++++++++++++++++++++++----------
 1 file changed, 45 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 38f388c39dce..107c4528654f 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -381,21 +381,53 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 }
 
 /*
- * We cannot currently handle tokens with rotated data.  We need a
- * generalized routine to rotate the data in place.  It is anticipated
- * that we won't encounter rotated data in the general case.
+ * We can shift data by up to LOCAL_BUF_LEN bytes in a pass.  If we need
+ * to do more than that, we shift repeatedly.  Kevin Coffman reports
+ * seeing 28 bytes as the value used by Microsoft clients and servers
+ * with AES, so this constant is chosen to allow handling 28 in one pass
+ * without using too much stack space.
+ *
+ * If that proves to a problem perhaps we could use a more clever
+ * algorithm.
  */
-static u32
-rotate_left(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, u16 rrc)
+#define LOCAL_BUF_LEN 32u
+
+static void rotate_buf_a_little(struct xdr_buf *buf, unsigned int shift)
 {
-	unsigned int realrrc = rrc % (buf->len - offset - GSS_KRB5_TOK_HDR_LEN);
+	char head[LOCAL_BUF_LEN];
+	char tmp[LOCAL_BUF_LEN];
+	unsigned int this_len, i;
+
+	BUG_ON(shift > LOCAL_BUF_LEN);
 
-	if (realrrc == 0)
-		return 0;
+	read_bytes_from_xdr_buf(buf, 0, head, shift);
+	for (i = 0; i + shift < buf->len; i += LOCAL_BUF_LEN) {
+		this_len = min(LOCAL_BUF_LEN, buf->len - (i + shift));
+		read_bytes_from_xdr_buf(buf, i+shift, tmp, this_len);
+		write_bytes_to_xdr_buf(buf, i, tmp, this_len);
+	}
+	write_bytes_to_xdr_buf(buf, buf->len - shift, head, shift);
+}
 
-	dprintk("%s: cannot process token with rotated data: "
-		"rrc %u, realrrc %u\n", __func__, rrc, realrrc);
-	return 1;
+static void _rotate_left(struct xdr_buf *buf, unsigned int shift)
+{
+	int shifted = 0;
+	int this_shift;
+
+	shift %= buf->len;
+	while (shifted < shift) {
+		this_shift = min(shift - shifted, LOCAL_BUF_LEN);
+		rotate_buf_a_little(buf, this_shift);
+		shifted += this_shift;
+	}
+}
+
+static void rotate_left(u32 base, struct xdr_buf *buf, unsigned int shift)
+{
+	struct xdr_buf subbuf;
+
+	xdr_buf_subsegment(buf, &subbuf, base, buf->len - base);
+	_rotate_left(&subbuf, shift);
 }
 
 static u32
@@ -495,11 +527,8 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 
 	seqnum = be64_to_cpup((__be64 *)(ptr + 8));
 
-	if (rrc != 0) {
-		err = rotate_left(kctx, offset, buf, rrc);
-		if (err)
-			return GSS_S_FAILURE;
-	}
+	if (rrc != 0)
+		rotate_left(offset + 16, buf, rrc);
 
 	err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf,
 					&headskip, &tailskip);
-- 
cgit v1.2.3-55-g7522


From 9793f7c88937e7ac07305ab1af1a519225836823 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky
Date: Wed, 2 May 2012 16:08:38 +0400
Subject: SUNRPC: new svc_bind() routine introduced

This new routine is responsible for service registration in a specified
network context.

The idea is to separate service creation from per-net operations.

Note also: since registering service with svc_bind() can fail, the
service will be destroyed and during destruction it will try to
unregister itself from rpcbind. In this case unregistration has to be
skipped.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/svc.c             |  6 ++++++
 fs/nfs/callback.c          |  8 ++++++++
 fs/nfsd/nfssvc.c           |  9 +++++++++
 include/linux/sunrpc/svc.h |  1 +
 net/sunrpc/rpcb_clnt.c     | 12 +++++++-----
 net/sunrpc/svc.c           | 19 ++++++++++---------
 6 files changed, 41 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1ead0750cdbb..b7e92ed56885 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -324,6 +324,12 @@ int lockd_up(struct net *net)
 		goto out;
 	}
 
+	error = svc_bind(serv, net);
+	if (error < 0) {
+		printk(KERN_WARNING "lockd_up: bind service failed\n");
+		goto destroy_and_out;
+	}
+
 	error = make_socks(serv, net);
 	if (error < 0)
 		goto destroy_and_out;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index eb95f5091c1a..26b38fb8102e 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -17,6 +17,7 @@
 #include <linux/kthread.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/bc_xprt.h>
+#include <linux/nsproxy.h>
 
 #include <net/inet_sock.h>
 
@@ -253,6 +254,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
 	char svc_name[12];
 	int ret = 0;
 	int minorversion_setup;
+	struct net *net = current->nsproxy->net_ns;
 
 	mutex_lock(&nfs_callback_mutex);
 	if (cb_info->users++ || cb_info->task != NULL) {
@@ -265,6 +267,12 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
 		goto out_err;
 	}
 
+	ret = svc_bind(serv, net);
+	if (ret < 0) {
+		printk(KERN_WARNING "NFS: bind callback service failed\n");
+		goto out_err;
+	}
+
 	minorversion_setup =  nfs_minorversion_callback_svc_setup(minorversion,
 					serv, xprt, &rqstp, &callback_svc);
 	if (!minorversion_setup) {
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index cb4d51d8cbdb..0762f3c9e0fb 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/fs_struct.h>
 #include <linux/swap.h>
+#include <linux/nsproxy.h>
 
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svcsock.h>
@@ -330,6 +331,8 @@ static int nfsd_get_default_max_blksize(void)
 
 int nfsd_create_serv(void)
 {
+	int error;
+
 	WARN_ON(!mutex_is_locked(&nfsd_mutex));
 	if (nfsd_serv) {
 		svc_get(nfsd_serv);
@@ -343,6 +346,12 @@ int nfsd_create_serv(void)
 	if (nfsd_serv == NULL)
 		return -ENOMEM;
 
+	error = svc_bind(nfsd_serv, current->nsproxy->net_ns);
+	if (error < 0) {
+		svc_destroy(nfsd_serv);
+		return error;
+	}
+
 	set_max_drc();
 	do_gettimeofday(&nfssvc_boot);		/* record boot time */
 	return 0;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 51b29ac45a8e..2b43e0214261 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -416,6 +416,7 @@ struct svc_procedure {
  */
 int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
 void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
+int svc_bind(struct svc_serv *serv, struct net *net);
 struct svc_serv *svc_create(struct svc_program *, unsigned int,
 			    void (*shutdown)(struct svc_serv *, struct net *net));
 struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 78ac39fd9fe7..4c38b33ab8a8 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -180,14 +180,16 @@ void rpcb_put_local(struct net *net)
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 	struct rpc_clnt *clnt = sn->rpcb_local_clnt;
 	struct rpc_clnt *clnt4 = sn->rpcb_local_clnt4;
-	int shutdown;
+	int shutdown = 0;
 
 	spin_lock(&sn->rpcb_clnt_lock);
-	if (--sn->rpcb_users == 0) {
-		sn->rpcb_local_clnt = NULL;
-		sn->rpcb_local_clnt4 = NULL;
+	if (sn->rpcb_users) {
+		if (--sn->rpcb_users == 0) {
+			sn->rpcb_local_clnt = NULL;
+			sn->rpcb_local_clnt4 = NULL;
+		}
+		shutdown = !sn->rpcb_users;
 	}
-	shutdown = !sn->rpcb_users;
 	spin_unlock(&sn->rpcb_clnt_lock);
 
 	if (shutdown) {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4153846984ac..e6d542cee0f3 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -407,6 +407,14 @@ static int svc_uses_rpcbind(struct svc_serv *serv)
 	return 0;
 }
 
+int svc_bind(struct svc_serv *serv, struct net *net)
+{
+	if (!svc_uses_rpcbind(serv))
+		return 0;
+	return svc_rpcb_setup(serv, net);
+}
+EXPORT_SYMBOL_GPL(svc_bind);
+
 /*
  * Create an RPC service
  */
@@ -471,15 +479,8 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 		spin_lock_init(&pool->sp_lock);
 	}
 
-	if (svc_uses_rpcbind(serv)) {
-		if (svc_rpcb_setup(serv, current->nsproxy->net_ns) < 0) {
-			kfree(serv->sv_pools);
-			kfree(serv);
-			return NULL;
-		}
-		if (!serv->sv_shutdown)
-			serv->sv_shutdown = svc_rpcb_cleanup;
-	}
+	if (svc_uses_rpcbind(serv) && (!serv->sv_shutdown))
+		serv->sv_shutdown = svc_rpcb_cleanup;
 
 	return serv;
 }
-- 
cgit v1.2.3-55-g7522


From 786185b5f8abefa6a8a16695bb4a59c164d5a071 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky
Date: Fri, 4 May 2012 12:49:41 +0400
Subject: SUNRPC: move per-net operations from svc_destroy()

The idea is to separate service destruction and per-net operations,
because these are two different things and the mix looks ugly.

Notes:

1) For NFS server this patch looks ugly (sorry for that). But these
place will be rewritten soon during NFSd containerization.

2) LockD per-net counter increase int lockd_up() was moved prior to
make_socks() to make lockd_down_net() call safe in case of error.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/svc.c    | 27 +++++++++++++++------------
 fs/nfs/callback.c |  3 +++
 fs/nfsd/nfsctl.c  | 12 +++++++++---
 fs/nfsd/nfssvc.c  | 14 ++++++++++++++
 net/sunrpc/svc.c  |  4 ----
 5 files changed, 41 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index b7e92ed56885..3250f280a171 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -257,7 +257,7 @@ static int lockd_up_net(struct net *net)
 	struct svc_serv *serv = nlmsvc_rqst->rq_server;
 	int error;
 
-	if (ln->nlmsvc_users)
+	if (ln->nlmsvc_users++)
 		return 0;
 
 	error = svc_rpcb_setup(serv, net);
@@ -272,6 +272,7 @@ static int lockd_up_net(struct net *net)
 err_socks:
 	svc_rpcb_cleanup(serv, net);
 err_rpcb:
+	ln->nlmsvc_users--;
 	return error;
 }
 
@@ -299,6 +300,7 @@ int lockd_up(struct net *net)
 {
 	struct svc_serv *serv;
 	int		error = 0;
+	struct lockd_net *ln = net_generic(net, lockd_net_id);
 
 	mutex_lock(&nlmsvc_mutex);
 	/*
@@ -330,9 +332,11 @@ int lockd_up(struct net *net)
 		goto destroy_and_out;
 	}
 
+	ln->nlmsvc_users++;
+
 	error = make_socks(serv, net);
 	if (error < 0)
-		goto destroy_and_out;
+		goto err_start;
 
 	/*
 	 * Create the kernel thread and wait for it to start.
@@ -344,7 +348,7 @@ int lockd_up(struct net *net)
 		printk(KERN_WARNING
 			"lockd_up: svc_rqst allocation failed, error=%d\n",
 			error);
-		goto destroy_and_out;
+		goto err_start;
 	}
 
 	svc_sock_update_bufs(serv);
@@ -358,7 +362,7 @@ int lockd_up(struct net *net)
 		nlmsvc_rqst = NULL;
 		printk(KERN_WARNING
 			"lockd_up: kthread_run failed, error=%d\n", error);
-		goto destroy_and_out;
+		goto err_start;
 	}
 
 	/*
@@ -368,14 +372,14 @@ int lockd_up(struct net *net)
 destroy_and_out:
 	svc_destroy(serv);
 out:
-	if (!error) {
-		struct lockd_net *ln = net_generic(net, lockd_net_id);
-
-		ln->nlmsvc_users++;
+	if (!error)
 		nlmsvc_users++;
-	}
 	mutex_unlock(&nlmsvc_mutex);
 	return error;
+
+err_start:
+	lockd_down_net(net);
+	goto destroy_and_out;
 }
 EXPORT_SYMBOL_GPL(lockd_up);
 
@@ -386,11 +390,10 @@ void
 lockd_down(struct net *net)
 {
 	mutex_lock(&nlmsvc_mutex);
+	lockd_down_net(net);
 	if (nlmsvc_users) {
-		if (--nlmsvc_users) {
-			lockd_down_net(net);
+		if (--nlmsvc_users)
 			goto out;
-		}
 	} else {
 		printk(KERN_ERR "lockd_down: no users! task=%p\n",
 			nlmsvc_task);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 26b38fb8102e..cff39406f965 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -314,6 +314,8 @@ out_err:
 	dprintk("NFS: Couldn't create callback socket or server thread; "
 		"err = %d\n", ret);
 	cb_info->users--;
+	if (serv)
+		svc_shutdown_net(serv, net);
 	goto out;
 }
 
@@ -328,6 +330,7 @@ void nfs_callback_down(int minorversion)
 	cb_info->users--;
 	if (cb_info->users == 0 && cb_info->task != NULL) {
 		kthread_stop(cb_info->task);
+		svc_shutdown_net(cb_info->serv, current->nsproxy->net_ns);
 		svc_exit_thread(cb_info->rqst);
 		cb_info->serv = NULL;
 		cb_info->rqst = NULL;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 72699885ac48..c55298ed5772 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -661,6 +661,7 @@ static ssize_t __write_ports_addfd(char *buf)
 {
 	char *mesg = buf;
 	int fd, err;
+	struct net *net = &init_net;
 
 	err = get_int(&mesg, &fd);
 	if (err != 0 || fd < 0)
@@ -672,6 +673,8 @@ static ssize_t __write_ports_addfd(char *buf)
 
 	err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
 	if (err < 0) {
+		if (nfsd_serv->sv_nrthreads == 1)
+			svc_shutdown_net(nfsd_serv, net);
 		svc_destroy(nfsd_serv);
 		return err;
 	}
@@ -709,6 +712,7 @@ static ssize_t __write_ports_addxprt(char *buf)
 	char transport[16];
 	struct svc_xprt *xprt;
 	int port, err;
+	struct net *net = &init_net;
 
 	if (sscanf(buf, "%15s %4u", transport, &port) != 2)
 		return -EINVAL;
@@ -720,12 +724,12 @@ static ssize_t __write_ports_addxprt(char *buf)
 	if (err != 0)
 		return err;
 
-	err = svc_create_xprt(nfsd_serv, transport, &init_net,
+	err = svc_create_xprt(nfsd_serv, transport, net,
 				PF_INET, port, SVC_SOCK_ANONYMOUS);
 	if (err < 0)
 		goto out_err;
 
-	err = svc_create_xprt(nfsd_serv, transport, &init_net,
+	err = svc_create_xprt(nfsd_serv, transport, net,
 				PF_INET6, port, SVC_SOCK_ANONYMOUS);
 	if (err < 0 && err != -EAFNOSUPPORT)
 		goto out_close;
@@ -734,12 +738,14 @@ static ssize_t __write_ports_addxprt(char *buf)
 	nfsd_serv->sv_nrthreads--;
 	return 0;
 out_close:
-	xprt = svc_find_xprt(nfsd_serv, transport, &init_net, PF_INET, port);
+	xprt = svc_find_xprt(nfsd_serv, transport, net, PF_INET, port);
 	if (xprt != NULL) {
 		svc_close_xprt(xprt);
 		svc_xprt_put(xprt);
 	}
 out_err:
+	if (nfsd_serv->sv_nrthreads == 1)
+		svc_shutdown_net(nfsd_serv, net);
 	svc_destroy(nfsd_serv);
 	return err;
 }
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 0762f3c9e0fb..ee709fc8f58b 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -382,6 +382,7 @@ int nfsd_set_nrthreads(int n, int *nthreads)
 	int i = 0;
 	int tot = 0;
 	int err = 0;
+	struct net *net = &init_net;
 
 	WARN_ON(!mutex_is_locked(&nfsd_mutex));
 
@@ -426,6 +427,9 @@ int nfsd_set_nrthreads(int n, int *nthreads)
 		if (err)
 			break;
 	}
+
+	if (nfsd_serv->sv_nrthreads == 1)
+		svc_shutdown_net(nfsd_serv, net);
 	svc_destroy(nfsd_serv);
 
 	return err;
@@ -441,6 +445,7 @@ nfsd_svc(unsigned short port, int nrservs)
 {
 	int	error;
 	bool	nfsd_up_before;
+	struct net *net = &init_net;
 
 	mutex_lock(&nfsd_mutex);
 	dprintk("nfsd: creating service\n");
@@ -473,6 +478,8 @@ out_shutdown:
 	if (error < 0 && !nfsd_up_before)
 		nfsd_shutdown();
 out_destroy:
+	if (nfsd_serv->sv_nrthreads == 1)
+		svc_shutdown_net(nfsd_serv, net);
 	svc_destroy(nfsd_serv);		/* Release server */
 out:
 	mutex_unlock(&nfsd_mutex);
@@ -556,6 +563,9 @@ nfsd(void *vrqstp)
 	nfsdstats.th_cnt --;
 
 out:
+	if (rqstp->rq_server->sv_nrthreads == 1)
+		svc_shutdown_net(rqstp->rq_server, &init_net);
+
 	/* Release the thread */
 	svc_exit_thread(rqstp);
 
@@ -668,8 +678,12 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file)
 int nfsd_pool_stats_release(struct inode *inode, struct file *file)
 {
 	int ret = seq_release(inode, file);
+	struct net *net = &init_net;
+
 	mutex_lock(&nfsd_mutex);
 	/* this function really, really should have been called svc_put() */
+	if (nfsd_serv->sv_nrthreads == 1)
+		svc_shutdown_net(nfsd_serv, net);
 	svc_destroy(nfsd_serv);
 	mutex_unlock(&nfsd_mutex);
 	return ret;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e6d542cee0f3..b7210f5cc893 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -537,8 +537,6 @@ EXPORT_SYMBOL_GPL(svc_shutdown_net);
 void
 svc_destroy(struct svc_serv *serv)
 {
-	struct net *net = current->nsproxy->net_ns;
-
 	dprintk("svc: svc_destroy(%s, %d)\n",
 				serv->sv_program->pg_name,
 				serv->sv_nrthreads);
@@ -553,8 +551,6 @@ svc_destroy(struct svc_serv *serv)
 
 	del_timer_sync(&serv->sv_temptimer);
 
-	svc_shutdown_net(serv, net);
-
 	/*
 	 * The last user is gone and thus all sockets have to be destroyed to
 	 * the point. Check this.
-- 
cgit v1.2.3-55-g7522


From 91c427ac3a61ccabae0fdef53563edf40394b6c9 Mon Sep 17 00:00:00 2001
From: Jeff Layton
Date: Fri, 4 May 2012 11:44:12 -0400
Subject: sunrpc: do array overrun check in svc_recv before allocating pages

There's little point in waiting until after we allocate all of the pages
to see if we're going to overrun the array. In the event that this
calculation is really off we could end up scribbling over a bunch of
memory and make it tougher to debug.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc_xprt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 4bda09d7e1a4..8195c6acba12 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -601,6 +601,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 
 	/* now allocate needed pages.  If we get a failure, sleep briefly */
 	pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE;
+	BUG_ON(pages >= RPCSVC_MAXPAGES);
 	for (i = 0; i < pages ; i++)
 		while (rqstp->rq_pages[i] == NULL) {
 			struct page *p = alloc_page(GFP_KERNEL);
@@ -615,7 +616,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 			rqstp->rq_pages[i] = p;
 		}
 	rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
-	BUG_ON(pages >= RPCSVC_MAXPAGES);
 
 	/* Make arg->head point to first page and arg->pages point to rest */
 	arg = &rqstp->rq_arg;
-- 
cgit v1.2.3-55-g7522


From 3ddbe8794ff1bcba5af09f2e6949755d6251958f Mon Sep 17 00:00:00 2001
From: J. Bruce Fields
Date: Wed, 16 May 2012 17:14:14 -0400
Subject: svcrpc: fix a comment typo

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc_xprt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 8195c6acba12..37a1f664d108 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -976,7 +976,7 @@ void svc_close_net(struct svc_serv *serv, struct net *net)
 	svc_clear_pools(serv, net);
 	/*
 	 * At this point the sp_sockets lists will stay empty, since
-	 * svc_enqueue will not add new entries without taking the
+	 * svc_xprt_enqueue will not add new entries without taking the
 	 * sp_lock and checking XPT_BUSY.
 	 */
 	svc_clear_list(&serv->sv_tempsocks, net);
-- 
cgit v1.2.3-55-g7522


From 03a4e1f6ddf25f48848e1bddcffc0ad489648331 Mon Sep 17 00:00:00 2001
From: J. Bruce Fields
Date: Mon, 14 May 2012 19:55:22 -0400
Subject: nfsd4: move principal name into svc_cred

Instead of keeping the principal name associated with a request in a
structure that's private to auth_gss and using an accessor function,
move it to svc_cred.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfs/callback.c                  |  2 +-
 fs/nfsd/nfs4callback.c             |  5 +++--
 fs/nfsd/nfs4state.c                | 34 +++++++++++++++++-----------------
 fs/nfsd/state.h                    |  1 -
 include/linux/sunrpc/svcauth.h     |  9 +++++++++
 include/linux/sunrpc/svcauth_gss.h |  1 -
 net/sunrpc/auth_gss/svcauth_gss.c  | 25 ++++++-------------------
 net/sunrpc/svcauth_unix.c          |  2 ++
 8 files changed, 38 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index cff39406f965..970659daa323 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -343,7 +343,7 @@ void nfs_callback_down(int minorversion)
 int
 check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
 {
-	char *p = svc_gss_principal(rqstp);
+	char *p = rqstp->rq_cred.cr_principal;
 
 	if (rqstp->rq_authop->flavour != RPC_AUTH_GSS)
 		return 1;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c8e9f637153a..a5fd6b982f27 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -650,9 +650,10 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 	struct rpc_clnt *client;
 
 	if (clp->cl_minorversion == 0) {
-		if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
+		if (!clp->cl_cred.cr_principal &&
+				(clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
 			return -EINVAL;
-		args.client_name = clp->cl_principal;
+		args.client_name = clp->cl_cred.cr_principal;
 		args.prognumber	= conn->cb_prog,
 		args.protocol = XPRT_TRANSPORT_TCP;
 		args.authflavor = clp->cl_flavor;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 5415550a63a9..37bafb290c11 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1087,9 +1087,7 @@ free_client(struct nfs4_client *clp)
 		list_del(&ses->se_perclnt);
 		nfsd4_put_session_locked(ses);
 	}
-	if (clp->cl_cred.cr_group_info)
-		put_group_info(clp->cl_cred.cr_group_info);
-	kfree(clp->cl_principal);
+	free_svc_cred(&clp->cl_cred);
 	kfree(clp->cl_name.data);
 	kfree(clp);
 }
@@ -1170,12 +1168,20 @@ static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
 	target->cl_clientid.cl_id = source->cl_clientid.cl_id; 
 }
 
-static void copy_cred(struct svc_cred *target, struct svc_cred *source)
+static int copy_cred(struct svc_cred *target, struct svc_cred *source)
 {
+	if (source->cr_principal) {
+		target->cr_principal =
+				kstrdup(source->cr_principal, GFP_KERNEL);
+		if (target->cr_principal == NULL)
+			return -ENOMEM;
+	} else
+		target->cr_principal = NULL;
 	target->cr_uid = source->cr_uid;
 	target->cr_gid = source->cr_gid;
 	target->cr_group_info = source->cr_group_info;
 	get_group_info(target->cr_group_info);
+	return 0;
 }
 
 static int same_name(const char *n1, const char *n2)
@@ -1242,25 +1248,20 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 {
 	struct nfs4_client *clp;
 	struct sockaddr *sa = svc_addr(rqstp);
-	char *princ;
+	int ret;
 
 	clp = alloc_client(name);
 	if (clp == NULL)
 		return NULL;
 
 	INIT_LIST_HEAD(&clp->cl_sessions);
-
-	princ = svc_gss_principal(rqstp);
-	if (princ) {
-		clp->cl_principal = kstrdup(princ, GFP_KERNEL);
-		if (clp->cl_principal == NULL) {
-			spin_lock(&client_lock);
-			free_client(clp);
-			spin_unlock(&client_lock);
-			return NULL;
-		}
+	ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred);
+	if (ret) {
+		spin_lock(&client_lock);
+		free_client(clp);
+		spin_unlock(&client_lock);
+		return NULL;
 	}
-
 	idr_init(&clp->cl_stateids);
 	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
 	atomic_set(&clp->cl_refcount, 0);
@@ -1279,7 +1280,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 	copy_verf(clp, verf);
 	rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
 	clp->cl_flavor = rqstp->rq_flavor;
-	copy_cred(&clp->cl_cred, &rqstp->rq_cred);
 	gen_confirm(clp);
 	clp->cl_cb_session = NULL;
 	return clp;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 89ab137d379a..849091e16ea6 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -232,7 +232,6 @@ struct nfs4_client {
 	time_t                  cl_time;        /* time of last lease renewal */
 	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
 	u32			cl_flavor;	/* setclientid pseudoflavor */
-	char			*cl_principal;	/* setclientid principal name */
 	struct svc_cred		cl_cred; 	/* setclientid principal */
 	clientid_t		cl_clientid;	/* generated by server */
 	nfs4_verifier		cl_confirm;	/* generated by server */
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 2c54683b91de..16fe477a96e0 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -15,13 +15,22 @@
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/cache.h>
 #include <linux/hash.h>
+#include <linux/cred.h>
 
 struct svc_cred {
 	uid_t			cr_uid;
 	gid_t			cr_gid;
 	struct group_info	*cr_group_info;
+	char			*cr_principal; /* for gss */
 };
 
+static inline void free_svc_cred(struct svc_cred *cred)
+{
+	if (cred->cr_group_info)
+		put_group_info(cred->cr_group_info);
+	kfree(cred->cr_principal);
+}
+
 struct svc_rqst;		/* forward decl */
 struct in6_addr;
 
diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h
index 7c32daa025eb..726aff1a5201 100644
--- a/include/linux/sunrpc/svcauth_gss.h
+++ b/include/linux/sunrpc/svcauth_gss.h
@@ -22,7 +22,6 @@ int gss_svc_init_net(struct net *net);
 void gss_svc_shutdown_net(struct net *net);
 int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name);
 u32 svcauth_gss_flavor(struct auth_domain *dom);
-char *svc_gss_principal(struct svc_rqst *);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index f0a0cd4470b7..d091d7d09bea 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -335,7 +335,6 @@ struct rsc {
 	struct svc_cred		cred;
 	struct gss_svc_seq_data	seqdata;
 	struct gss_ctx		*mechctx;
-	char			*client_name;
 };
 
 static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old);
@@ -346,9 +345,7 @@ static void rsc_free(struct rsc *rsci)
 	kfree(rsci->handle.data);
 	if (rsci->mechctx)
 		gss_delete_sec_context(&rsci->mechctx);
-	if (rsci->cred.cr_group_info)
-		put_group_info(rsci->cred.cr_group_info);
-	kfree(rsci->client_name);
+	free_svc_cred(&rsci->cred);
 }
 
 static void rsc_put(struct kref *ref)
@@ -386,7 +383,7 @@ rsc_init(struct cache_head *cnew, struct cache_head *ctmp)
 	tmp->handle.data = NULL;
 	new->mechctx = NULL;
 	new->cred.cr_group_info = NULL;
-	new->client_name = NULL;
+	new->cred.cr_principal = NULL;
 }
 
 static void
@@ -401,8 +398,8 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp)
 	spin_lock_init(&new->seqdata.sd_lock);
 	new->cred = tmp->cred;
 	tmp->cred.cr_group_info = NULL;
-	new->client_name = tmp->client_name;
-	tmp->client_name = NULL;
+	new->cred.cr_principal = tmp->cred.cr_principal;
+	tmp->cred.cr_principal = NULL;
 }
 
 static struct cache_head *
@@ -496,8 +493,8 @@ static int rsc_parse(struct cache_detail *cd,
 		/* get client name */
 		len = qword_get(&mesg, buf, mlen);
 		if (len > 0) {
-			rsci.client_name = kstrdup(buf, GFP_KERNEL);
-			if (!rsci.client_name)
+			rsci.cred.cr_principal = kstrdup(buf, GFP_KERNEL);
+			if (!rsci.cred.cr_principal)
 				goto out;
 		}
 
@@ -927,16 +924,6 @@ struct gss_svc_data {
 	struct rsc			*rsci;
 };
 
-char *svc_gss_principal(struct svc_rqst *rqstp)
-{
-	struct gss_svc_data *gd = (struct gss_svc_data *)rqstp->rq_auth_data;
-
-	if (gd && gd->rsci)
-		return gd->rsci->client_name;
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(svc_gss_principal);
-
 static int
 svcauth_gss_set_client(struct svc_rqst *rqstp)
 {
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 9c3b9f014468..12e4897d0bf3 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -740,6 +740,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
 	struct svc_cred	*cred = &rqstp->rq_cred;
 
 	cred->cr_group_info = NULL;
+	cred->cr_principal = NULL;
 	rqstp->rq_client = NULL;
 
 	if (argv->iov_len < 3*4)
@@ -805,6 +806,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
 	int		len   = argv->iov_len;
 
 	cred->cr_group_info = NULL;
+	cred->cr_principal = NULL;
 	rqstp->rq_client = NULL;
 
 	if ((len -= 3*4) < 0)
-- 
cgit v1.2.3-55-g7522


From d5497fc693a446ce9100fcf4117c3f795ddfd0d2 Mon Sep 17 00:00:00 2001
From: J. Bruce Fields
Date: Mon, 14 May 2012 22:06:49 -0400
Subject: nfsd4: move rq_flavor into svc_cred

Move the rq_flavor into struct svc_cred, and use it in setclientid and
exchange_id comparisons as well.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/auth.c                    | 2 +-
 fs/nfsd/export.c                  | 6 +++---
 fs/nfsd/nfs4idmap.c               | 4 ++--
 fs/nfsd/nfs4state.c               | 6 +++---
 include/linux/sunrpc/svc.h        | 1 -
 include/linux/sunrpc/svcauth.h    | 1 +
 net/sunrpc/auth_gss/svcauth_gss.c | 2 +-
 net/sunrpc/svcauth_unix.c         | 4 ++--
 8 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 79717a40daba..b42eaf3aac16 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -10,7 +10,7 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
 	struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
 
 	for (f = exp->ex_flavors; f < end; f++) {
-		if (f->pseudoflavor == rqstp->rq_flavor)
+		if (f->pseudoflavor == rqstp->rq_cred.cr_flavor)
 			return f->flags;
 	}
 	return exp->ex_flags;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index ec16364f782e..ba233499b9a5 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -904,13 +904,13 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
 		return 0;
 	/* ip-address based client; check sec= export option: */
 	for (f = exp->ex_flavors; f < end; f++) {
-		if (f->pseudoflavor == rqstp->rq_flavor)
+		if (f->pseudoflavor == rqstp->rq_cred.cr_flavor)
 			return 0;
 	}
 	/* defaults in absence of sec= options: */
 	if (exp->ex_nflavors == 0) {
-		if (rqstp->rq_flavor == RPC_AUTH_NULL ||
-		    rqstp->rq_flavor == RPC_AUTH_UNIX)
+		if (rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL ||
+		    rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX)
 			return 0;
 	}
 	return nfserr_wrongsec;
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 286a7f8f2024..dae36f1dee95 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -605,7 +605,7 @@ numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namel
 static __be32
 do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id)
 {
-	if (nfs4_disable_idmapping && rqstp->rq_flavor < RPC_AUTH_GSS)
+	if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
 		if (numeric_name_to_id(rqstp, type, name, namelen, id))
 			return 0;
 		/*
@@ -618,7 +618,7 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u
 static int
 do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
 {
-	if (nfs4_disable_idmapping && rqstp->rq_flavor < RPC_AUTH_GSS)
+	if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
 		return sprintf(name, "%u", id);
 	return idmap_id_to_name(rqstp, type, id, name);
 }
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6dc0cfb37541..c743cdf51ebc 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1177,6 +1177,7 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source)
 			return -ENOMEM;
 	} else
 		target->cr_principal = NULL;
+	target->cr_flavor = source->cr_flavor;
 	target->cr_uid = source->cr_uid;
 	target->cr_gid = source->cr_gid;
 	target->cr_group_info = source->cr_group_info;
@@ -1213,11 +1214,11 @@ static bool groups_equal(struct group_info *g1, struct group_info *g2)
 	return true;
 }
 
-/* XXX what about NGROUP */
 static int
 same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
 {
-	if ((cr1->cr_uid != cr2->cr_uid)
+	if ((cr1->cr_flavor != cr2->cr_flavor)
+		|| (cr1->cr_uid != cr2->cr_uid)
 		|| (cr1->cr_gid != cr2->cr_gid)
 		|| !groups_equal(cr1->cr_group_info, cr2->cr_group_info))
 		return false;
@@ -1299,7 +1300,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
 	copy_verf(clp, verf);
 	rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
-	clp->cl_flavor = rqstp->rq_flavor;
 	gen_confirm(clp);
 	clp->cl_cb_session = NULL;
 	return clp;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 2b43e0214261..40e0a273faea 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -232,7 +232,6 @@ struct svc_rqst {
 	struct svc_pool *	rq_pool;	/* thread pool */
 	struct svc_procedure *	rq_procinfo;	/* procedure info */
 	struct auth_ops *	rq_authop;	/* authentication flavour */
-	u32			rq_flavor;	/* pseudoflavor */
 	struct svc_cred		rq_cred;	/* auth info */
 	void *			rq_xprt_ctxt;	/* transport specific context ptr */
 	struct svc_deferred_req*rq_deferred;	/* deferred request we are replaying */
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 16fe477a96e0..dd74084a9799 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -21,6 +21,7 @@ struct svc_cred {
 	uid_t			cr_uid;
 	gid_t			cr_gid;
 	struct group_info	*cr_group_info;
+	u32			cr_flavor; /* pseudoflavor */
 	char			*cr_principal; /* for gss */
 };
 
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index d091d7d09bea..bcb773781ec0 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1202,7 +1202,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 		}
 		svcdata->rsci = rsci;
 		cache_get(&rsci->h);
-		rqstp->rq_flavor = gss_svc_to_pseudoflavor(
+		rqstp->rq_cred.cr_flavor = gss_svc_to_pseudoflavor(
 					rsci->mechctx->mech_type, gc->gc_svc);
 		ret = SVC_OK;
 		goto out;
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 12e4897d0bf3..88962cf34377 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -768,7 +768,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
 	svc_putnl(resv, RPC_AUTH_NULL);
 	svc_putnl(resv, 0);
 
-	rqstp->rq_flavor = RPC_AUTH_NULL;
+	rqstp->rq_cred.cr_flavor = RPC_AUTH_NULL;
 	return SVC_OK;
 }
 
@@ -839,7 +839,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
 	svc_putnl(resv, RPC_AUTH_NULL);
 	svc_putnl(resv, 0);
 
-	rqstp->rq_flavor = RPC_AUTH_UNIX;
+	rqstp->rq_cred.cr_flavor = RPC_AUTH_UNIX;
 	return SVC_OK;
 
 badcred:
-- 
cgit v1.2.3-55-g7522


From d58367515f47371f7202d8b258ee0614a8955a6a Mon Sep 17 00:00:00 2001
From: Al Viro
Date: Thu, 19 Apr 2012 18:17:15 -0400
Subject: sch_atm.c: get rid of poinless extern

sockfd_lookup() is declared in linux/net.h, which is pulled by
linux/skbuff.h (and needed for a lot of other stuff in sch_atm.c
anyway).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/sched/sch_atm.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 8522a4793374..ca8e0a57d945 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -16,8 +16,6 @@
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 
-extern struct socket *sockfd_lookup(int fd, int *err);	/* @@@ fix this */
-
 /*
  * The ATM queuing discipline provides a framework for invoking classifiers
  * (aka "filters"), which in turn select classes of this queuing discipline.
-- 
cgit v1.2.3-55-g7522


From 7433819a1eefd4e74711fffd6d54e30a644ef240 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Thu, 31 May 2012 21:00:26 +0000
Subject: tcp: do not create inetpeer on SYNACK message

Another problem on SYNFLOOD/DDOS attack is the inetpeer cache getting
larger and larger, using lots of memory and cpu time.

tcp_v4_send_synack()
->inet_csk_route_req()
 ->ip_route_output_flow()
  ->rt_set_nexthop()
   ->rt_init_metrics()
    ->inet_getpeer( create = true)

This is a side effect of commit a4daad6b09230 (net: Pre-COW metrics for
TCP) added in 2.6.39

Possible solution :

Instruct inet_csk_route_req() to remove FLOWI_FLAG_PRECOW_METRICS

Before patch :

# grep peer /proc/slabinfo
inet_peer_cache   4175430 4175430    192   42    2 : tunables    0    0    0 : slabdata  99415  99415      0

Samples: 41K of event 'cycles', Event count (approx.): 30716565122
+  20,24%      ksoftirqd/0  [kernel.kallsyms]           [k] inet_getpeer
+   8,19%      ksoftirqd/0  [kernel.kallsyms]           [k] peer_avl_rebalance.isra.1
+   4,81%      ksoftirqd/0  [kernel.kallsyms]           [k] sha_transform
+   3,64%      ksoftirqd/0  [kernel.kallsyms]           [k] fib_table_lookup
+   2,36%      ksoftirqd/0  [ixgbe]                     [k] ixgbe_poll
+   2,16%      ksoftirqd/0  [kernel.kallsyms]           [k] __ip_route_output_key
+   2,11%      ksoftirqd/0  [kernel.kallsyms]           [k] kernel_map_pages
+   2,11%      ksoftirqd/0  [kernel.kallsyms]           [k] ip_route_input_common
+   2,01%      ksoftirqd/0  [kernel.kallsyms]           [k] __inet_lookup_established
+   1,83%      ksoftirqd/0  [kernel.kallsyms]           [k] md5_transform
+   1,75%      ksoftirqd/0  [kernel.kallsyms]           [k] check_leaf.isra.9
+   1,49%      ksoftirqd/0  [kernel.kallsyms]           [k] ipt_do_table
+   1,46%      ksoftirqd/0  [kernel.kallsyms]           [k] hrtimer_interrupt
+   1,45%      ksoftirqd/0  [kernel.kallsyms]           [k] kmem_cache_alloc
+   1,29%      ksoftirqd/0  [kernel.kallsyms]           [k] inet_csk_search_req
+   1,29%      ksoftirqd/0  [kernel.kallsyms]           [k] __netif_receive_skb
+   1,16%      ksoftirqd/0  [kernel.kallsyms]           [k] copy_user_generic_string
+   1,15%      ksoftirqd/0  [kernel.kallsyms]           [k] kmem_cache_free
+   1,02%      ksoftirqd/0  [kernel.kallsyms]           [k] tcp_make_synack
+   0,93%      ksoftirqd/0  [kernel.kallsyms]           [k] _raw_spin_lock_bh
+   0,87%      ksoftirqd/0  [kernel.kallsyms]           [k] __call_rcu
+   0,84%      ksoftirqd/0  [kernel.kallsyms]           [k] rt_garbage_collect
+   0,84%      ksoftirqd/0  [kernel.kallsyms]           [k] fib_rules_lookup

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Hans Schillstrom <hans.schillstrom@ericsson.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 95e61596e605..f9ee7417f6a0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -377,7 +377,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 
 	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
-			   sk->sk_protocol, inet_sk_flowi_flags(sk),
+			   sk->sk_protocol,
+			   inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS,
 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
 			   ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
 	security_req_classify_flow(req, flowi4_to_flowi(fl4));
-- 
cgit v1.2.3-55-g7522


From fff3269907897ee91406ece125795f53e722677e Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Fri, 1 Jun 2012 01:47:50 +0000
Subject: tcp: reflect SYN queue_mapping into SYNACK packets

While testing how linux behaves on SYNFLOOD attack on multiqueue device
(ixgbe), I found that SYNACK messages were dropped at Qdisc level
because we send them all on a single queue.

Obvious choice is to reflect incoming SYN packet @queue_mapping to
SYNACK packet.

Under stress, my machine could only send 25.000 SYNACK per second (for
200.000 incoming SYN per second). NIC : ixgbe with 16 rx/tx queues.

After patch, not a single SYNACK is dropped.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Hans Schillstrom <hans.schillstrom@ericsson.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 9 ++++++---
 net/ipv6/tcp_ipv6.c | 9 ++++++---
 2 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a43b87dfe800..c8d28c433b2b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -824,7 +824,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  */
 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 			      struct request_sock *req,
-			      struct request_values *rvp)
+			      struct request_values *rvp,
+			      u16 queue_mapping)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct flowi4 fl4;
@@ -840,6 +841,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	if (skb) {
 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
 
+		skb_set_queue_mapping(skb, queue_mapping);
 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 					    ireq->rmt_addr,
 					    ireq->opt);
@@ -854,7 +856,7 @@ static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
 			      struct request_values *rvp)
 {
 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
-	return tcp_v4_send_synack(sk, NULL, req, rvp);
+	return tcp_v4_send_synack(sk, NULL, req, rvp, 0);
 }
 
 /*
@@ -1422,7 +1424,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_rsk(req)->snt_synack = tcp_time_stamp;
 
 	if (tcp_v4_send_synack(sk, dst, req,
-			       (struct request_values *)&tmp_ext) ||
+			       (struct request_values *)&tmp_ext,
+			       skb_get_queue_mapping(skb)) ||
 	    want_cookie)
 		goto drop_and_free;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 554d5999abc4..3a9aec29581a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -476,7 +476,8 @@ out:
 
 
 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
-			      struct request_values *rvp)
+			      struct request_values *rvp,
+			      u16 queue_mapping)
 {
 	struct inet6_request_sock *treq = inet6_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -513,6 +514,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
 
 		fl6.daddr = treq->rmt_addr;
+		skb_set_queue_mapping(skb, queue_mapping);
 		err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
 		err = net_xmit_eval(err);
 	}
@@ -528,7 +530,7 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
 			     struct request_values *rvp)
 {
 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
-	return tcp_v6_send_synack(sk, req, rvp);
+	return tcp_v6_send_synack(sk, req, rvp, 0);
 }
 
 static void tcp_v6_reqsk_destructor(struct request_sock *req)
@@ -1213,7 +1215,8 @@ have_isn:
 	security_inet_conn_request(sk, skb, req);
 
 	if (tcp_v6_send_synack(sk, req,
-			       (struct request_values *)&tmp_ext) ||
+			       (struct request_values *)&tmp_ext,
+			       skb_get_queue_mapping(skb)) ||
 	    want_cookie)
 		goto drop_and_free;
 
-- 
cgit v1.2.3-55-g7522


From f309532bf3e1cc1b787403d84e3039812a7dbe50 Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Sat, 2 Jun 2012 15:21:43 -0700
Subject: tty: Revert the tty locking series, it needs more work

This reverts the tty layer change to use per-tty locking, because it's
not correct yet, and fixing it will require some more deep surgery.

The main revert is d29f3ef39be4 ("tty_lock: Localise the lock"), but
there are several smaller commits that built upon it, they also get
reverted here. The list of reverted commits is:

  fde86d310886 - tty: add lockdep annotations
  8f6576ad476b - tty: fix ldisc lock inversion trace
  d3ca8b64b97e - pty: Fix lock inversion
  b1d679afd766 - tty: drop the pty lock during hangup
  abcefe5fc357 - tty/amiserial: Add missing argument for tty_unlock()
  fd11b42e3598 - cris: fix missing tty arg in wait_event_interruptible_tty call
  d29f3ef39be4 - tty_lock: Localise the lock

The revert had a trivial conflict in the 68360serial.c staging driver
that got removed in the meantime.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/amiserial.c      | 14 ++++-----
 drivers/tty/cyclades.c       |  2 +-
 drivers/tty/n_r3964.c        | 11 ++++---
 drivers/tty/pty.c            | 25 +++++++---------
 drivers/tty/serial/crisv10.c |  8 ++---
 drivers/tty/synclink.c       |  4 +--
 drivers/tty/synclink_gt.c    |  4 +--
 drivers/tty/synclinkmp.c     |  4 +--
 drivers/tty/tty_io.c         | 67 +++++++++++++++++------------------------
 drivers/tty/tty_ldisc.c      | 67 ++++++++++++++++++-----------------------
 drivers/tty/tty_mutex.c      | 71 ++++++++++----------------------------------
 drivers/tty/tty_port.c       |  6 ++--
 include/linux/tty.h          | 23 ++++++--------
 net/bluetooth/rfcomm/tty.c   |  4 +--
 14 files changed, 119 insertions(+), 191 deletions(-)

(limited to 'net')

diff --git a/drivers/tty/amiserial.c b/drivers/tty/amiserial.c
index 35819e312624..6cc4358f68c1 100644
--- a/drivers/tty/amiserial.c
+++ b/drivers/tty/amiserial.c
@@ -1033,7 +1033,7 @@ static int get_serial_info(struct tty_struct *tty, struct serial_state *state,
 	if (!retinfo)
 		return -EFAULT;
 	memset(&tmp, 0, sizeof(tmp));
-	tty_lock(tty);
+	tty_lock();
 	tmp.line = tty->index;
 	tmp.port = state->port;
 	tmp.flags = state->tport.flags;
@@ -1042,7 +1042,7 @@ static int get_serial_info(struct tty_struct *tty, struct serial_state *state,
 	tmp.close_delay = state->tport.close_delay;
 	tmp.closing_wait = state->tport.closing_wait;
 	tmp.custom_divisor = state->custom_divisor;
-	tty_unlock(tty);
+	tty_unlock();
 	if (copy_to_user(retinfo,&tmp,sizeof(*retinfo)))
 		return -EFAULT;
 	return 0;
@@ -1059,12 +1059,12 @@ static int set_serial_info(struct tty_struct *tty, struct serial_state *state,
 	if (copy_from_user(&new_serial,new_info,sizeof(new_serial)))
 		return -EFAULT;
 
-	tty_lock(tty);
+	tty_lock();
 	change_spd = ((new_serial.flags ^ port->flags) & ASYNC_SPD_MASK) ||
 		new_serial.custom_divisor != state->custom_divisor;
 	if (new_serial.irq || new_serial.port != state->port ||
 			new_serial.xmit_fifo_size != state->xmit_fifo_size) {
-		tty_unlock(tty);
+		tty_unlock();
 		return -EINVAL;
 	}
   
@@ -1074,7 +1074,7 @@ static int set_serial_info(struct tty_struct *tty, struct serial_state *state,
 		    (new_serial.xmit_fifo_size != state->xmit_fifo_size) ||
 		    ((new_serial.flags & ~ASYNC_USR_MASK) !=
 		     (port->flags & ~ASYNC_USR_MASK))) {
-			tty_unlock(tty);
+			tty_unlock();
 			return -EPERM;
 		}
 		port->flags = ((port->flags & ~ASYNC_USR_MASK) |
@@ -1084,7 +1084,7 @@ static int set_serial_info(struct tty_struct *tty, struct serial_state *state,
 	}
 
 	if (new_serial.baud_base < 9600) {
-		tty_unlock(tty);
+		tty_unlock();
 		return -EINVAL;
 	}
 
@@ -1116,7 +1116,7 @@ check_and_exit:
 		}
 	} else
 		retval = startup(tty, state);
-	tty_unlock(tty);
+	tty_unlock();
 	return retval;
 }
 
diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c
index 6984e1a2686a..e61cabdd69df 100644
--- a/drivers/tty/cyclades.c
+++ b/drivers/tty/cyclades.c
@@ -1599,7 +1599,7 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
 	 * If the port is the middle of closing, bail out now
 	 */
 	if (tty_hung_up_p(filp) || (info->port.flags & ASYNC_CLOSING)) {
-		wait_event_interruptible_tty(tty, info->port.close_wait,
+		wait_event_interruptible_tty(info->port.close_wait,
 				!(info->port.flags & ASYNC_CLOSING));
 		return (info->port.flags & ASYNC_HUP_NOTIFY) ? -EAGAIN: -ERESTARTSYS;
 	}
diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c
index 656ad93bbc96..5c6c31459a2f 100644
--- a/drivers/tty/n_r3964.c
+++ b/drivers/tty/n_r3964.c
@@ -1065,8 +1065,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file,
 
 	TRACE_L("read()");
 
-	/* FIXME: should use a private lock */
-	tty_lock(tty);
+	tty_lock();
 
 	pClient = findClient(pInfo, task_pid(current));
 	if (pClient) {
@@ -1078,7 +1077,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file,
 				goto unlock;
 			}
 			/* block until there is a message: */
-			wait_event_interruptible_tty(tty, pInfo->read_wait,
+			wait_event_interruptible_tty(pInfo->read_wait,
 					(pMsg = remove_msg(pInfo, pClient)));
 		}
 
@@ -1108,7 +1107,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file,
 	}
 	ret = -EPERM;
 unlock:
-	tty_unlock(tty);
+	tty_unlock();
 	return ret;
 }
 
@@ -1157,7 +1156,7 @@ static ssize_t r3964_write(struct tty_struct *tty, struct file *file,
 	pHeader->locks = 0;
 	pHeader->owner = NULL;
 
-	tty_lock(tty);
+	tty_lock();
 
 	pClient = findClient(pInfo, task_pid(current));
 	if (pClient) {
@@ -1176,7 +1175,7 @@ static ssize_t r3964_write(struct tty_struct *tty, struct file *file,
 	add_tx_queue(pInfo, pHeader);
 	trigger_transmit(pInfo);
 
-	tty_unlock(tty);
+	tty_unlock();
 
 	return 0;
 }
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 65c7c62c7aae..5505ffc91da4 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -47,7 +47,6 @@ static void pty_close(struct tty_struct *tty, struct file *filp)
 	wake_up_interruptible(&tty->read_wait);
 	wake_up_interruptible(&tty->write_wait);
 	tty->packet = 0;
-	/* Review - krefs on tty_link ?? */
 	if (!tty->link)
 		return;
 	tty->link->packet = 0;
@@ -63,9 +62,9 @@ static void pty_close(struct tty_struct *tty, struct file *filp)
 		        mutex_unlock(&devpts_mutex);
 		}
 #endif
-		tty_unlock(tty);
+		tty_unlock();
 		tty_vhangup(tty->link);
-		tty_lock(tty);
+		tty_lock();
 	}
 }
 
@@ -623,27 +622,26 @@ static int ptmx_open(struct inode *inode, struct file *filp)
 		return retval;
 
 	/* find a device that is not in use. */
-	mutex_lock(&devpts_mutex);
+	tty_lock();
 	index = devpts_new_index(inode);
+	tty_unlock();
 	if (index < 0) {
 		retval = index;
 		goto err_file;
 	}
 
-	mutex_unlock(&devpts_mutex);
-
 	mutex_lock(&tty_mutex);
+	mutex_lock(&devpts_mutex);
 	tty = tty_init_dev(ptm_driver, index);
+	mutex_unlock(&devpts_mutex);
+	tty_lock();
+	mutex_unlock(&tty_mutex);
 
 	if (IS_ERR(tty)) {
 		retval = PTR_ERR(tty);
 		goto out;
 	}
 
-	/* The tty returned here is locked so we can safely
-	   drop the mutex */
-	mutex_unlock(&tty_mutex);
-
 	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
 
 	tty_add_file(tty, filp);
@@ -656,17 +654,16 @@ static int ptmx_open(struct inode *inode, struct file *filp)
 	if (retval)
 		goto err_release;
 
-	tty_unlock(tty);
+	tty_unlock();
 	return 0;
 err_release:
-	tty_unlock(tty);
+	tty_unlock();
 	tty_release(inode, filp);
 	return retval;
 out:
-	mutex_unlock(&tty_mutex);
 	devpts_kill_index(inode, index);
+	tty_unlock();
 err_file:
-        mutex_unlock(&devpts_mutex);
 	tty_free_file(filp);
 	return retval;
 }
diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c
index 7264d4d26717..80b6b1b1f725 100644
--- a/drivers/tty/serial/crisv10.c
+++ b/drivers/tty/serial/crisv10.c
@@ -3976,7 +3976,7 @@ block_til_ready(struct tty_struct *tty, struct file * filp,
 	 */
 	if (tty_hung_up_p(filp) ||
 	    (info->flags & ASYNC_CLOSING)) {
-		wait_event_interruptible_tty(tty, info->close_wait,
+		wait_event_interruptible_tty(info->close_wait,
 			!(info->flags & ASYNC_CLOSING));
 #ifdef SERIAL_DO_RESTART
 		if (info->flags & ASYNC_HUP_NOTIFY)
@@ -4052,9 +4052,9 @@ block_til_ready(struct tty_struct *tty, struct file * filp,
 		printk("block_til_ready blocking: ttyS%d, count = %d\n",
 		       info->line, info->count);
 #endif
-		tty_unlock(tty);
+		tty_unlock();
 		schedule();
-		tty_lock(tty);
+		tty_lock();
 	}
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&info->open_wait, &wait);
@@ -4115,7 +4115,7 @@ rs_open(struct tty_struct *tty, struct file * filp)
 	 */
 	if (tty_hung_up_p(filp) ||
 	    (info->flags & ASYNC_CLOSING)) {
-		wait_event_interruptible_tty(tty, info->close_wait,
+		wait_event_interruptible_tty(info->close_wait,
 			!(info->flags & ASYNC_CLOSING));
 #ifdef SERIAL_DO_RESTART
 		return ((info->flags & ASYNC_HUP_NOTIFY) ?
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index 5ed0daae6564..593d40ad0a6b 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -3338,9 +3338,9 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 			printk("%s(%d):block_til_ready blocking on %s count=%d\n",
 				 __FILE__,__LINE__, tty->driver->name, port->count );
 				 
-		tty_unlock(tty);
+		tty_unlock();
 		schedule();
-		tty_lock(tty);
+		tty_lock();
 	}
 	
 	set_current_state(TASK_RUNNING);
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 45b43f11ca39..aa1debf97cc7 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -3336,9 +3336,9 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		}
 
 		DBGINFO(("%s block_til_ready wait\n", tty->driver->name));
-		tty_unlock(tty);
+		tty_unlock();
 		schedule();
-		tty_lock(tty);
+		tty_lock();
 	}
 
 	set_current_state(TASK_RUNNING);
diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index 4a1e4f07765b..a3dddc12d2fe 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -3357,9 +3357,9 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 			printk("%s(%d):%s block_til_ready() count=%d\n",
 				 __FILE__,__LINE__, tty->driver->name, port->count );
 
-		tty_unlock(tty);
+		tty_unlock();
 		schedule();
-		tty_lock(tty);
+		tty_lock();
 	}
 
 	set_current_state(TASK_RUNNING);
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 9e930c009bf2..b425c79675ad 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -185,7 +185,6 @@ void free_tty_struct(struct tty_struct *tty)
 		put_device(tty->dev);
 	kfree(tty->write_buf);
 	tty_buffer_free_all(tty);
-	tty->magic = 0xDEADDEAD;
 	kfree(tty);
 }
 
@@ -574,7 +573,7 @@ void __tty_hangup(struct tty_struct *tty)
 	}
 	spin_unlock(&redirect_lock);
 
-	tty_lock(tty);
+	tty_lock();
 
 	/* some functions below drop BTM, so we need this bit */
 	set_bit(TTY_HUPPING, &tty->flags);
@@ -667,7 +666,7 @@ void __tty_hangup(struct tty_struct *tty)
 	clear_bit(TTY_HUPPING, &tty->flags);
 	tty_ldisc_enable(tty);
 
-	tty_unlock(tty);
+	tty_unlock();
 
 	if (f)
 		fput(f);
@@ -1104,12 +1103,12 @@ void tty_write_message(struct tty_struct *tty, char *msg)
 {
 	if (tty) {
 		mutex_lock(&tty->atomic_write_lock);
-		tty_lock(tty);
+		tty_lock();
 		if (tty->ops->write && !test_bit(TTY_CLOSING, &tty->flags)) {
-			tty_unlock(tty);
+			tty_unlock();
 			tty->ops->write(tty, msg, strlen(msg));
 		} else
-			tty_unlock(tty);
+			tty_unlock();
 		tty_write_unlock(tty);
 	}
 	return;
@@ -1404,7 +1403,6 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx)
 	}
 	initialize_tty_struct(tty, driver, idx);
 
-	tty_lock(tty);
 	retval = tty_driver_install_tty(driver, tty);
 	if (retval < 0)
 		goto err_deinit_tty;
@@ -1417,11 +1415,9 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx)
 	retval = tty_ldisc_setup(tty, tty->link);
 	if (retval)
 		goto err_release_tty;
-	/* Return the tty locked so that it cannot vanish under the caller */
 	return tty;
 
 err_deinit_tty:
-	tty_unlock(tty);
 	deinitialize_tty_struct(tty);
 	free_tty_struct(tty);
 err_module_put:
@@ -1430,7 +1426,6 @@ err_module_put:
 
 	/* call the tty release_tty routine to clean out this slot */
 err_release_tty:
-	tty_unlock(tty);
 	printk_ratelimited(KERN_INFO "tty_init_dev: ldisc open failed, "
 				 "clearing slot %d\n", idx);
 	release_tty(tty, idx);
@@ -1633,7 +1628,7 @@ int tty_release(struct inode *inode, struct file *filp)
 	if (tty_paranoia_check(tty, inode, __func__))
 		return 0;
 
-	tty_lock(tty);
+	tty_lock();
 	check_tty_count(tty, __func__);
 
 	__tty_fasync(-1, filp, 0);
@@ -1642,11 +1637,10 @@ int tty_release(struct inode *inode, struct file *filp)
 	pty_master = (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 		      tty->driver->subtype == PTY_TYPE_MASTER);
 	devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
-	/* Review: parallel close */
 	o_tty = tty->link;
 
 	if (tty_release_checks(tty, o_tty, idx)) {
-		tty_unlock(tty);
+		tty_unlock();
 		return 0;
 	}
 
@@ -1658,7 +1652,7 @@ int tty_release(struct inode *inode, struct file *filp)
 	if (tty->ops->close)
 		tty->ops->close(tty, filp);
 
-	tty_unlock(tty);
+	tty_unlock();
 	/*
 	 * Sanity check: if tty->count is going to zero, there shouldn't be
 	 * any waiters on tty->read_wait or tty->write_wait.  We test the
@@ -1681,7 +1675,7 @@ int tty_release(struct inode *inode, struct file *filp)
 		   opens on /dev/tty */
 
 		mutex_lock(&tty_mutex);
-		tty_lock_pair(tty, o_tty);
+		tty_lock();
 		tty_closing = tty->count <= 1;
 		o_tty_closing = o_tty &&
 			(o_tty->count <= (pty_master ? 1 : 0));
@@ -1712,7 +1706,7 @@ int tty_release(struct inode *inode, struct file *filp)
 
 		printk(KERN_WARNING "%s: %s: read/write wait queue active!\n",
 				__func__, tty_name(tty, buf));
-		tty_unlock_pair(tty, o_tty);
+		tty_unlock();
 		mutex_unlock(&tty_mutex);
 		schedule();
 	}
@@ -1775,7 +1769,7 @@ int tty_release(struct inode *inode, struct file *filp)
 
 	/* check whether both sides are closing ... */
 	if (!tty_closing || (o_tty && !o_tty_closing)) {
-		tty_unlock_pair(tty, o_tty);
+		tty_unlock();
 		return 0;
 	}
 
@@ -1788,16 +1782,14 @@ int tty_release(struct inode *inode, struct file *filp)
 	tty_ldisc_release(tty, o_tty);
 	/*
 	 * The release_tty function takes care of the details of clearing
-	 * the slots and preserving the termios structure. The tty_unlock_pair
-	 * should be safe as we keep a kref while the tty is locked (so the
-	 * unlock never unlocks a freed tty).
+	 * the slots and preserving the termios structure.
 	 */
 	release_tty(tty, idx);
-	tty_unlock_pair(tty, o_tty);
 
 	/* Make this pty number available for reallocation */
 	if (devpts)
 		devpts_kill_index(inode, idx);
+	tty_unlock();
 	return 0;
 }
 
@@ -1901,9 +1893,6 @@ static struct tty_driver *tty_lookup_driver(dev_t device, struct file *filp,
  *	Locking: tty_mutex protects tty, tty_lookup_driver and tty_init_dev.
  *		 tty->count should protect the rest.
  *		 ->siglock protects ->signal/->sighand
- *
- *	Note: the tty_unlock/lock cases without a ref are only safe due to
- *	tty_mutex
  */
 
 static int tty_open(struct inode *inode, struct file *filp)
@@ -1927,7 +1916,8 @@ retry_open:
 	retval = 0;
 
 	mutex_lock(&tty_mutex);
-	/* This is protected by the tty_mutex */
+	tty_lock();
+
 	tty = tty_open_current_tty(device, filp);
 	if (IS_ERR(tty)) {
 		retval = PTR_ERR(tty);
@@ -1948,19 +1938,17 @@ retry_open:
 	}
 
 	if (tty) {
-		tty_lock(tty);
 		retval = tty_reopen(tty);
-		if (retval < 0) {
-			tty_unlock(tty);
+		if (retval)
 			tty = ERR_PTR(retval);
-		}
-	} else	/* Returns with the tty_lock held for now */
+	} else
 		tty = tty_init_dev(driver, index);
 
 	mutex_unlock(&tty_mutex);
 	if (driver)
 		tty_driver_kref_put(driver);
 	if (IS_ERR(tty)) {
+		tty_unlock();
 		retval = PTR_ERR(tty);
 		goto err_file;
 	}
@@ -1989,7 +1977,7 @@ retry_open:
 		printk(KERN_DEBUG "%s: error %d in opening %s...\n", __func__,
 				retval, tty->name);
 #endif
-		tty_unlock(tty); /* need to call tty_release without BTM */
+		tty_unlock(); /* need to call tty_release without BTM */
 		tty_release(inode, filp);
 		if (retval != -ERESTARTSYS)
 			return retval;
@@ -2001,15 +1989,17 @@ retry_open:
 		/*
 		 * Need to reset f_op in case a hangup happened.
 		 */
+		tty_lock();
 		if (filp->f_op == &hung_up_tty_fops)
 			filp->f_op = &tty_fops;
+		tty_unlock();
 		goto retry_open;
 	}
-	tty_unlock(tty);
+	tty_unlock();
 
 
 	mutex_lock(&tty_mutex);
-	tty_lock(tty);
+	tty_lock();
 	spin_lock_irq(&current->sighand->siglock);
 	if (!noctty &&
 	    current->signal->leader &&
@@ -2017,10 +2007,11 @@ retry_open:
 	    tty->session == NULL)
 		__proc_set_tty(current, tty);
 	spin_unlock_irq(&current->sighand->siglock);
-	tty_unlock(tty);
+	tty_unlock();
 	mutex_unlock(&tty_mutex);
 	return 0;
 err_unlock:
+	tty_unlock();
 	mutex_unlock(&tty_mutex);
 	/* after locks to avoid deadlock */
 	if (!IS_ERR_OR_NULL(driver))
@@ -2103,13 +2094,10 @@ out:
 
 static int tty_fasync(int fd, struct file *filp, int on)
 {
-	struct tty_struct *tty = file_tty(filp);
 	int retval;
-
-	tty_lock(tty);
+	tty_lock();
 	retval = __tty_fasync(fd, filp, on);
-	tty_unlock(tty);
-
+	tty_unlock();
 	return retval;
 }
 
@@ -2946,7 +2934,6 @@ void initialize_tty_struct(struct tty_struct *tty,
 	tty->pgrp = NULL;
 	tty->overrun_time = jiffies;
 	tty_buffer_init(tty);
-	mutex_init(&tty->legacy_mutex);
 	mutex_init(&tty->termios_mutex);
 	mutex_init(&tty->ldisc_mutex);
 	init_waitqueue_head(&tty->write_wait);
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index ba8be396a621..9911eb6b34cd 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -568,7 +568,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 	if (IS_ERR(new_ldisc))
 		return PTR_ERR(new_ldisc);
 
-	tty_lock(tty);
+	tty_lock();
 	/*
 	 *	We need to look at the tty locking here for pty/tty pairs
 	 *	when both sides try to change in parallel.
@@ -582,12 +582,12 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 	 */
 
 	if (tty->ldisc->ops->num == ldisc) {
-		tty_unlock(tty);
+		tty_unlock();
 		tty_ldisc_put(new_ldisc);
 		return 0;
 	}
 
-	tty_unlock(tty);
+	tty_unlock();
 	/*
 	 *	Problem: What do we do if this blocks ?
 	 *	We could deadlock here
@@ -595,7 +595,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 
 	tty_wait_until_sent(tty, 0);
 
-	tty_lock(tty);
+	tty_lock();
 	mutex_lock(&tty->ldisc_mutex);
 
 	/*
@@ -605,10 +605,10 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 
 	while (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
 		mutex_unlock(&tty->ldisc_mutex);
-		tty_unlock(tty);
+		tty_unlock();
 		wait_event(tty_ldisc_wait,
 			test_bit(TTY_LDISC_CHANGING, &tty->flags) == 0);
-		tty_lock(tty);
+		tty_lock();
 		mutex_lock(&tty->ldisc_mutex);
 	}
 
@@ -623,7 +623,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 
 	o_ldisc = tty->ldisc;
 
-	tty_unlock(tty);
+	tty_unlock();
 	/*
 	 *	Make sure we don't change while someone holds a
 	 *	reference to the line discipline. The TTY_LDISC bit
@@ -650,7 +650,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 
 	retval = tty_ldisc_wait_idle(tty, 5 * HZ);
 
-	tty_lock(tty);
+	tty_lock();
 	mutex_lock(&tty->ldisc_mutex);
 
 	/* handle wait idle failure locked */
@@ -665,7 +665,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 		clear_bit(TTY_LDISC_CHANGING, &tty->flags);
 		mutex_unlock(&tty->ldisc_mutex);
 		tty_ldisc_put(new_ldisc);
-		tty_unlock(tty);
+		tty_unlock();
 		return -EIO;
 	}
 
@@ -708,7 +708,7 @@ enable:
 	if (o_work)
 		schedule_work(&o_tty->buf.work);
 	mutex_unlock(&tty->ldisc_mutex);
-	tty_unlock(tty);
+	tty_unlock();
 	return retval;
 }
 
@@ -816,11 +816,11 @@ void tty_ldisc_hangup(struct tty_struct *tty)
 	 * need to wait for another function taking the BTM
 	 */
 	clear_bit(TTY_LDISC, &tty->flags);
-	tty_unlock(tty);
+	tty_unlock();
 	cancel_work_sync(&tty->buf.work);
 	mutex_unlock(&tty->ldisc_mutex);
 retry:
-	tty_lock(tty);
+	tty_lock();
 	mutex_lock(&tty->ldisc_mutex);
 
 	/* At this point we have a closed ldisc and we want to
@@ -831,7 +831,7 @@ retry:
 		if (atomic_read(&tty->ldisc->users) != 1) {
 			char cur_n[TASK_COMM_LEN], tty_n[64];
 			long timeout = 3 * HZ;
-			tty_unlock(tty);
+			tty_unlock();
 
 			while (tty_ldisc_wait_idle(tty, timeout) == -EBUSY) {
 				timeout = MAX_SCHEDULE_TIMEOUT;
@@ -894,23 +894,6 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
 	tty_ldisc_enable(tty);
 	return 0;
 }
-
-static void tty_ldisc_kill(struct tty_struct *tty)
-{
-	mutex_lock(&tty->ldisc_mutex);
-	/*
-	 * Now kill off the ldisc
-	 */
-	tty_ldisc_close(tty, tty->ldisc);
-	tty_ldisc_put(tty->ldisc);
-	/* Force an oops if we mess this up */
-	tty->ldisc = NULL;
-
-	/* Ensure the next open requests the N_TTY ldisc */
-	tty_set_termios_ldisc(tty, N_TTY);
-	mutex_unlock(&tty->ldisc_mutex);
-}
-
 /**
  *	tty_ldisc_release		-	release line discipline
  *	@tty: tty being shut down
@@ -929,19 +912,27 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 	 * race with the set_ldisc code path.
 	 */
 
-	tty_unlock_pair(tty, o_tty);
+	tty_unlock();
 	tty_ldisc_halt(tty);
 	tty_ldisc_flush_works(tty);
-	if (o_tty) {
-		tty_ldisc_halt(o_tty);
-		tty_ldisc_flush_works(o_tty);
-	}
-	tty_lock_pair(tty, o_tty);
+	tty_lock();
 
+	mutex_lock(&tty->ldisc_mutex);
+	/*
+	 * Now kill off the ldisc
+	 */
+	tty_ldisc_close(tty, tty->ldisc);
+	tty_ldisc_put(tty->ldisc);
+	/* Force an oops if we mess this up */
+	tty->ldisc = NULL;
+
+	/* Ensure the next open requests the N_TTY ldisc */
+	tty_set_termios_ldisc(tty, N_TTY);
+	mutex_unlock(&tty->ldisc_mutex);
 
-	tty_ldisc_kill(tty);
+	/* This will need doing differently if we need to lock */
 	if (o_tty)
-		tty_ldisc_kill(o_tty);
+		tty_ldisc_release(o_tty, NULL);
 
 	/* And the memory resources remaining (buffers, termios) will be
 	   disposed of when the kref hits zero */
diff --git a/drivers/tty/tty_mutex.c b/drivers/tty/tty_mutex.c
index 67feac9e6ebb..9ff986c32a21 100644
--- a/drivers/tty/tty_mutex.c
+++ b/drivers/tty/tty_mutex.c
@@ -4,70 +4,29 @@
 #include <linux/semaphore.h>
 #include <linux/sched.h>
 
-/* Legacy tty mutex glue */
-
-enum {
-	TTY_MUTEX_NORMAL,
-	TTY_MUTEX_NESTED,
-};
+/*
+ * The 'big tty mutex'
+ *
+ * This mutex is taken and released by tty_lock() and tty_unlock(),
+ * replacing the older big kernel lock.
+ * It can no longer be taken recursively, and does not get
+ * released implicitly while sleeping.
+ *
+ * Don't use in new code.
+ */
+static DEFINE_MUTEX(big_tty_mutex);
 
 /*
  * Getting the big tty mutex.
  */
-
-static void __lockfunc tty_lock_nested(struct tty_struct *tty,
-				       unsigned int subclass)
+void __lockfunc tty_lock(void)
 {
-	if (tty->magic != TTY_MAGIC) {
-		printk(KERN_ERR "L Bad %p\n", tty);
-		WARN_ON(1);
-		return;
-	}
-	tty_kref_get(tty);
-	mutex_lock_nested(&tty->legacy_mutex, subclass);
-}
-
-void __lockfunc tty_lock(struct tty_struct *tty)
-{
-	return tty_lock_nested(tty, TTY_MUTEX_NORMAL);
+	mutex_lock(&big_tty_mutex);
 }
 EXPORT_SYMBOL(tty_lock);
 
-void __lockfunc tty_unlock(struct tty_struct *tty)
+void __lockfunc tty_unlock(void)
 {
-	if (tty->magic != TTY_MAGIC) {
-		printk(KERN_ERR "U Bad %p\n", tty);
-		WARN_ON(1);
-		return;
-	}
-	mutex_unlock(&tty->legacy_mutex);
-	tty_kref_put(tty);
+	mutex_unlock(&big_tty_mutex);
 }
 EXPORT_SYMBOL(tty_unlock);
-
-/*
- * Getting the big tty mutex for a pair of ttys with lock ordering
- * On a non pty/tty pair tty2 can be NULL which is just fine.
- */
-void __lockfunc tty_lock_pair(struct tty_struct *tty,
-					struct tty_struct *tty2)
-{
-	if (tty < tty2) {
-		tty_lock(tty);
-		tty_lock_nested(tty2, TTY_MUTEX_NESTED);
-	} else {
-		if (tty2 && tty2 != tty)
-			tty_lock(tty2);
-		tty_lock_nested(tty, TTY_MUTEX_NESTED);
-	}
-}
-EXPORT_SYMBOL(tty_lock_pair);
-
-void __lockfunc tty_unlock_pair(struct tty_struct *tty,
-						struct tty_struct *tty2)
-{
-	tty_unlock(tty);
-	if (tty2 && tty2 != tty)
-		tty_unlock(tty2);
-}
-EXPORT_SYMBOL(tty_unlock_pair);
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index d9cca95a5452..bf6e238146ae 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -230,7 +230,7 @@ int tty_port_block_til_ready(struct tty_port *port,
 
 	/* block if port is in the process of being closed */
 	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
-		wait_event_interruptible_tty(tty, port->close_wait,
+		wait_event_interruptible_tty(port->close_wait,
 				!(port->flags & ASYNC_CLOSING));
 		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
@@ -296,9 +296,9 @@ int tty_port_block_til_ready(struct tty_port *port,
 			retval = -ERESTARTSYS;
 			break;
 		}
-		tty_unlock(tty);
+		tty_unlock();
 		schedule();
-		tty_lock(tty);
+		tty_lock();
 	}
 	finish_wait(&port->open_wait, &wait);
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 4990ef2b1fb7..9f47ab540f65 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -268,7 +268,6 @@ struct tty_struct {
 	struct mutex ldisc_mutex;
 	struct tty_ldisc *ldisc;
 
-	struct mutex legacy_mutex;
 	struct mutex termios_mutex;
 	spinlock_t ctrl_lock;
 	/* Termios values are protected by the termios mutex */
@@ -606,12 +605,8 @@ extern long vt_compat_ioctl(struct tty_struct *tty,
 
 /* tty_mutex.c */
 /* functions for preparation of BKL removal */
-extern void __lockfunc tty_lock(struct tty_struct *tty);
-extern void __lockfunc tty_unlock(struct tty_struct *tty);
-extern void __lockfunc tty_lock_pair(struct tty_struct *tty,
-				struct tty_struct *tty2);
-extern void __lockfunc tty_unlock_pair(struct tty_struct *tty,
-				struct tty_struct *tty2);
+extern void __lockfunc tty_lock(void) __acquires(tty_lock);
+extern void __lockfunc tty_unlock(void) __releases(tty_lock);
 
 /*
  * this shall be called only from where BTM is held (like close)
@@ -626,9 +621,9 @@ extern void __lockfunc tty_unlock_pair(struct tty_struct *tty,
 static inline void tty_wait_until_sent_from_close(struct tty_struct *tty,
 		long timeout)
 {
-	tty_unlock(tty); /* tty->ops->close holds the BTM, drop it while waiting */
+	tty_unlock(); /* tty->ops->close holds the BTM, drop it while waiting */
 	tty_wait_until_sent(tty, timeout);
-	tty_lock(tty);
+	tty_lock();
 }
 
 /*
@@ -643,16 +638,16 @@ static inline void tty_wait_until_sent_from_close(struct tty_struct *tty,
  *
  * Do not use in new code.
  */
-#define wait_event_interruptible_tty(tty, wq, condition)		\
+#define wait_event_interruptible_tty(wq, condition)			\
 ({									\
 	int __ret = 0;							\
 	if (!(condition)) {						\
-		__wait_event_interruptible_tty(tty, wq, condition, __ret);	\
+		__wait_event_interruptible_tty(wq, condition, __ret);	\
 	}								\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_tty(tty, wq, condition, ret)		\
+#define __wait_event_interruptible_tty(wq, condition, ret)		\
 do {									\
 	DEFINE_WAIT(__wait);						\
 									\
@@ -661,9 +656,9 @@ do {									\
 		if (condition)						\
 			break;						\
 		if (!signal_pending(current)) {				\
-			tty_unlock(tty);					\
+			tty_unlock();					\
 			schedule();					\
-			tty_lock(tty);					\
+			tty_lock();					\
 			continue;					\
 		}							\
 		ret = -ERESTARTSYS;					\
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index aa5d73b786ac..d1820ff14aee 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -710,9 +710,9 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
 			break;
 		}
 
-		tty_unlock(tty);
+		tty_unlock();
 		schedule();
-		tty_lock(tty);
+		tty_lock();
 	}
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&dev->wait, &wait);
-- 
cgit v1.2.3-55-g7522


From bec4596b4e6770c7037f21f6bd27567b152dc0d6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Mon, 4 Jun 2012 00:18:19 +0000
Subject: drop_monitor: dont sleep in atomic context

drop_monitor calls several sleeping functions while in atomic context.

 BUG: sleeping function called from invalid context at mm/slub.c:943
 in_atomic(): 1, irqs_disabled(): 0, pid: 2103, name: kworker/0:2
 Pid: 2103, comm: kworker/0:2 Not tainted 3.5.0-rc1+ #55
 Call Trace:
  [<ffffffff810697ca>] __might_sleep+0xca/0xf0
  [<ffffffff811345a3>] kmem_cache_alloc_node+0x1b3/0x1c0
  [<ffffffff8105578c>] ? queue_delayed_work_on+0x11c/0x130
  [<ffffffff815343fb>] __alloc_skb+0x4b/0x230
  [<ffffffffa00b0360>] ? reset_per_cpu_data+0x160/0x160 [drop_monitor]
  [<ffffffffa00b022f>] reset_per_cpu_data+0x2f/0x160 [drop_monitor]
  [<ffffffffa00b03ab>] send_dm_alert+0x4b/0xb0 [drop_monitor]
  [<ffffffff810568e0>] process_one_work+0x130/0x4c0
  [<ffffffff81058249>] worker_thread+0x159/0x360
  [<ffffffff810580f0>] ? manage_workers.isra.27+0x240/0x240
  [<ffffffff8105d403>] kthread+0x93/0xa0
  [<ffffffff816be6d4>] kernel_thread_helper+0x4/0x10
  [<ffffffff8105d370>] ? kthread_freezable_should_stop+0x80/0x80
  [<ffffffff816be6d0>] ? gs_change+0xb/0xb

Rework the logic to call the sleeping functions in right context.

Use standard timer/workqueue api to let system chose any cpu to perform
the allocation and netlink send.

Also avoid a loop if reset_per_cpu_data() cannot allocate memory :
use mod_timer() to wait 1/10 second before next try.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Reviewed-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/drop_monitor.c | 102 ++++++++++++++++--------------------------------
 1 file changed, 33 insertions(+), 69 deletions(-)

(limited to 'net')

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index ea5fb9fcc3f5..d23b6682f4e9 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -36,9 +36,6 @@
 #define TRACE_ON 1
 #define TRACE_OFF 0
 
-static void send_dm_alert(struct work_struct *unused);
-
-
 /*
  * Globals, our netlink socket pointer
  * and the work handle that will send up
@@ -48,11 +45,10 @@ static int trace_state = TRACE_OFF;
 static DEFINE_MUTEX(trace_state_mutex);
 
 struct per_cpu_dm_data {
-	struct work_struct dm_alert_work;
-	struct sk_buff __rcu *skb;
-	atomic_t dm_hit_count;
-	struct timer_list send_timer;
-	int cpu;
+	spinlock_t		lock;
+	struct sk_buff		*skb;
+	struct work_struct	dm_alert_work;
+	struct timer_list	send_timer;
 };
 
 struct dm_hw_stat_delta {
@@ -78,13 +74,13 @@ static int dm_delay = 1;
 static unsigned long dm_hw_check_delta = 2*HZ;
 static LIST_HEAD(hw_stats_list);
 
-static void reset_per_cpu_data(struct per_cpu_dm_data *data)
+static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 {
 	size_t al;
 	struct net_dm_alert_msg *msg;
 	struct nlattr *nla;
 	struct sk_buff *skb;
-	struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1);
+	unsigned long flags;
 
 	al = sizeof(struct net_dm_alert_msg);
 	al += dm_hit_limit * sizeof(struct net_dm_drop_point);
@@ -99,65 +95,40 @@ static void reset_per_cpu_data(struct per_cpu_dm_data *data)
 				  sizeof(struct net_dm_alert_msg));
 		msg = nla_data(nla);
 		memset(msg, 0, al);
-	} else
-		schedule_work_on(data->cpu, &data->dm_alert_work);
-
-	/*
-	 * Don't need to lock this, since we are guaranteed to only
-	 * run this on a single cpu at a time.
-	 * Note also that we only update data->skb if the old and new skb
-	 * pointers don't match.  This ensures that we don't continually call
-	 * synchornize_rcu if we repeatedly fail to alloc a new netlink message.
-	 */
-	if (skb != oskb) {
-		rcu_assign_pointer(data->skb, skb);
-
-		synchronize_rcu();
-
-		atomic_set(&data->dm_hit_count, dm_hit_limit);
+	} else {
+		mod_timer(&data->send_timer, jiffies + HZ / 10);
 	}
 
+	spin_lock_irqsave(&data->lock, flags);
+	swap(data->skb, skb);
+	spin_unlock_irqrestore(&data->lock, flags);
+
+	return skb;
 }
 
-static void send_dm_alert(struct work_struct *unused)
+static void send_dm_alert(struct work_struct *work)
 {
 	struct sk_buff *skb;
-	struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
+	struct per_cpu_dm_data *data;
 
-	WARN_ON_ONCE(data->cpu != smp_processor_id());
+	data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
 
-	/*
-	 * Grab the skb we're about to send
-	 */
-	skb = rcu_dereference_protected(data->skb, 1);
-
-	/*
-	 * Replace it with a new one
-	 */
-	reset_per_cpu_data(data);
+	skb = reset_per_cpu_data(data);
 
-	/*
-	 * Ship it!
-	 */
 	if (skb)
 		genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
-
-	put_cpu_var(dm_cpu_data);
 }
 
 /*
  * This is the timer function to delay the sending of an alert
  * in the event that more drops will arrive during the
- * hysteresis period.  Note that it operates under the timer interrupt
- * so we don't need to disable preemption here
+ * hysteresis period.
  */
-static void sched_send_work(unsigned long unused)
+static void sched_send_work(unsigned long _data)
 {
-	struct per_cpu_dm_data *data =  &get_cpu_var(dm_cpu_data);
-
-	schedule_work_on(smp_processor_id(), &data->dm_alert_work);
+	struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data;
 
-	put_cpu_var(dm_cpu_data);
+	schedule_work(&data->dm_alert_work);
 }
 
 static void trace_drop_common(struct sk_buff *skb, void *location)
@@ -167,33 +138,28 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
 	struct nlattr *nla;
 	int i;
 	struct sk_buff *dskb;
-	struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data);
-
+	struct per_cpu_dm_data *data;
+	unsigned long flags;
 
-	rcu_read_lock();
-	dskb = rcu_dereference(data->skb);
+	local_irq_save(flags);
+	data = &__get_cpu_var(dm_cpu_data);
+	spin_lock(&data->lock);
+	dskb = data->skb;
 
 	if (!dskb)
 		goto out;
 
-	if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
-		/*
-		 * we're already at zero, discard this hit
-		 */
-		goto out;
-	}
-
 	nlh = (struct nlmsghdr *)dskb->data;
 	nla = genlmsg_data(nlmsg_data(nlh));
 	msg = nla_data(nla);
 	for (i = 0; i < msg->entries; i++) {
 		if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
 			msg->points[i].count++;
-			atomic_inc(&data->dm_hit_count);
 			goto out;
 		}
 	}
-
+	if (msg->entries == dm_hit_limit)
+		goto out;
 	/*
 	 * We need to create a new entry
 	 */
@@ -205,13 +171,11 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
 
 	if (!timer_pending(&data->send_timer)) {
 		data->send_timer.expires = jiffies + dm_delay * HZ;
-		add_timer_on(&data->send_timer, smp_processor_id());
+		add_timer(&data->send_timer);
 	}
 
 out:
-	rcu_read_unlock();
-	put_cpu_var(dm_cpu_data);
-	return;
+	spin_unlock_irqrestore(&data->lock, flags);
 }
 
 static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
@@ -418,11 +382,11 @@ static int __init init_net_drop_monitor(void)
 
 	for_each_possible_cpu(cpu) {
 		data = &per_cpu(dm_cpu_data, cpu);
-		data->cpu = cpu;
 		INIT_WORK(&data->dm_alert_work, send_dm_alert);
 		init_timer(&data->send_timer);
-		data->send_timer.data = cpu;
+		data->send_timer.data = (unsigned long)data;
 		data->send_timer.function = sched_send_work;
+		spin_lock_init(&data->lock);
 		reset_per_cpu_data(data);
 	}
 
-- 
cgit v1.2.3-55-g7522


From 925e64c3c512e9f4452eaa7d52fd4c1518b8fb11 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka
Date: Wed, 16 May 2012 15:27:20 +0200
Subject: mac80211: run scan after finish connection monitoring

commit 133d40f9a22bdfd2617a446f1e3209537c5415ec
Author: Stanislaw Gruszka <sgruszka@redhat.com>
Date:   Wed Mar 28 16:01:19 2012 +0200

    mac80211: do not scan and monitor connection in parallel

add bug, which make possible to start a scan and never finish it, so
make every new scanning request finish with -EBUSY error. This can
happen on code paths where we finish connection monitoring and clear
IEEE80211_STA_*_POLL flags, but do not check if scan was deferred.
This patch fixes those code paths.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 04c306308987..d94627c2929c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1220,6 +1220,22 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 	sdata->vif.bss_conf.qos = true;
 }
 
+static void __ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata)
+{
+	lockdep_assert_held(&sdata->local->mtx);
+
+	sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL |
+				IEEE80211_STA_BEACON_POLL);
+	ieee80211_run_deferred_scan(sdata->local);
+}
+
+static void ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata)
+{
+	mutex_lock(&sdata->local->mtx);
+	__ieee80211_stop_poll(sdata);
+	mutex_unlock(&sdata->local->mtx);
+}
+
 static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
 					   u16 capab, bool erp_valid, u8 erp)
 {
@@ -1285,8 +1301,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	sdata->u.mgd.flags |= IEEE80211_STA_RESET_SIGNAL_AVE;
 
 	/* just to be sure */
-	sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL |
-				IEEE80211_STA_BEACON_POLL);
+	ieee80211_stop_poll(sdata);
 
 	ieee80211_led_assoc(local, 1);
 
@@ -1456,8 +1471,7 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
 		return;
 	}
 
-	ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL |
-			  IEEE80211_STA_BEACON_POLL);
+	__ieee80211_stop_poll(sdata);
 
 	mutex_lock(&local->iflist_mtx);
 	ieee80211_recalc_ps(local, -1);
@@ -1477,7 +1491,6 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
 		  round_jiffies_up(jiffies +
 				   IEEE80211_CONNECTION_IDLE_TIME));
 out:
-	ieee80211_run_deferred_scan(local);
 	mutex_unlock(&local->mtx);
 }
 
@@ -2408,7 +2421,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 		net_dbg_ratelimited("%s: cancelling probereq poll due to a received beacon\n",
 				    sdata->name);
 #endif
+		mutex_lock(&local->mtx);
 		ifmgd->flags &= ~IEEE80211_STA_BEACON_POLL;
+		ieee80211_run_deferred_scan(local);
+		mutex_unlock(&local->mtx);
+
 		mutex_lock(&local->iflist_mtx);
 		ieee80211_recalc_ps(local, -1);
 		mutex_unlock(&local->iflist_mtx);
@@ -2595,8 +2612,7 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	u8 frame_buf[DEAUTH_DISASSOC_LEN];
 
-	ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL |
-			  IEEE80211_STA_BEACON_POLL);
+	ieee80211_stop_poll(sdata);
 
 	ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason,
 			       false, frame_buf);
@@ -2874,8 +2890,7 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
 	u32 flags;
 
 	if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-		sdata->u.mgd.flags &= ~(IEEE80211_STA_BEACON_POLL |
-					IEEE80211_STA_CONNECTION_POLL);
+		__ieee80211_stop_poll(sdata);
 
 		/* let's probe the connection once */
 		flags = sdata->local->hw.flags;
@@ -2944,7 +2959,10 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
 	if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running))
 		add_timer(&ifmgd->chswitch_timer);
 	ieee80211_sta_reset_beacon_monitor(sdata);
+
+	mutex_lock(&sdata->local->mtx);
 	ieee80211_restart_sta_timer(sdata);
+	mutex_unlock(&sdata->local->mtx);
 }
 #endif
 
-- 
cgit v1.2.3-55-g7522


From 28f333666ea766fdfb25de3783ff56cd2d1c51f0 Mon Sep 17 00:00:00 2001
From: Amitkumar Karwar
Date: Tue, 29 May 2012 15:39:06 -0700
Subject: cfg80211: use sme_state in ibss start/join path

CFG80211_DEV_WARN_ON() at "net/wireless/ibss.c line 63"
is unnecessarily triggered even after successful connection,
when cfg80211_ibss_joined() is called by driver inside
.join_ibss handler.

This patch fixes the problem by changing 'sme_state' in ibss path
and having WARN_ON() check for 'sme_state' similar to infra
association.

Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/ibss.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index d2a19b0ff71f..89baa3328411 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -42,6 +42,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid)
 	cfg80211_hold_bss(bss_from_pub(bss));
 	wdev->current_bss = bss_from_pub(bss);
 
+	wdev->sme_state = CFG80211_SME_CONNECTED;
 	cfg80211_upload_connect_keys(wdev);
 
 	nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid,
@@ -60,7 +61,7 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp)
 	struct cfg80211_event *ev;
 	unsigned long flags;
 
-	CFG80211_DEV_WARN_ON(!wdev->ssid_len);
+	CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTING);
 
 	ev = kzalloc(sizeof(*ev), gfp);
 	if (!ev)
@@ -115,9 +116,11 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 #ifdef CONFIG_CFG80211_WEXT
 	wdev->wext.ibss.channel = params->channel;
 #endif
+	wdev->sme_state = CFG80211_SME_CONNECTING;
 	err = rdev->ops->join_ibss(&rdev->wiphy, dev, params);
 	if (err) {
 		wdev->connect_keys = NULL;
+		wdev->sme_state = CFG80211_SME_IDLE;
 		return err;
 	}
 
@@ -169,6 +172,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
 	}
 
 	wdev->current_bss = NULL;
+	wdev->sme_state = CFG80211_SME_IDLE;
 	wdev->ssid_len = 0;
 #ifdef CONFIG_CFG80211_WEXT
 	if (!nowext)
-- 
cgit v1.2.3-55-g7522


From b8bacc187aa5b59af9b9fa19b3ce4df5ad1db112 Mon Sep 17 00:00:00 2001
From: Chun-Yeow Yeoh
Date: Wed, 30 May 2012 09:30:41 +0800
Subject: mac80211: Fix Unreachable Mesh Station Problem when joining to
 another MBSS

Mesh station that joins an MBSS is reachable using mesh portal with 6
address frame by mesh stations from another MBSS if these two different
MBSSes are bridged. However, if the mesh station later moves into the
same MBSS of those mesh stations, it is unreachable by mesh stations
in the MBSS due to the mpp_paths table is not deleted. A quick fix
is to perform mesh_path_lookup, if it is available for the target
destination, mpp_path_lookup is not performed. When the mesh station
moves back to its original MBSS, the mesh_paths will be deleted once
expired. So, it will be reachable using mpp_path_lookup again.

Signed-off-by: Chun-Yeow Yeoh <yeohchunyeow@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 847215bb2a6f..e453212fa17f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1737,7 +1737,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 	__le16 fc;
 	struct ieee80211_hdr hdr;
 	struct ieee80211s_hdr mesh_hdr __maybe_unused;
-	struct mesh_path __maybe_unused *mppath = NULL;
+	struct mesh_path __maybe_unused *mppath = NULL, *mpath = NULL;
 	const u8 *encaps_data;
 	int encaps_len, skip_header_bytes;
 	int nh_pos, h_pos;
@@ -1803,8 +1803,11 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 			goto fail;
 		}
 		rcu_read_lock();
-		if (!is_multicast_ether_addr(skb->data))
-			mppath = mpp_path_lookup(skb->data, sdata);
+		if (!is_multicast_ether_addr(skb->data)) {
+			mpath = mesh_path_lookup(skb->data, sdata);
+			if (!mpath)
+				mppath = mpp_path_lookup(skb->data, sdata);
+		}
 
 		/*
 		 * Use address extension if it is a packet from
-- 
cgit v1.2.3-55-g7522


From bd34ab62a3297bd7685da11b0cbe05ae4cd8b02c Mon Sep 17 00:00:00 2001
From: Meenakshi Venkataraman
Date: Wed, 30 May 2012 11:39:33 +0200
Subject: mac80211: fix error in station state transitions during reconfig

As part of hardware reconfig mac80211 tries
to restore the station state to its values
before the hardware reconfig, but it only
goes to the last-state - 1. Fix this
off-by-one error.

Cc: stable@kernel.org [3.4]
Signed-off-by: Meenakshi Venkataraman <meenakshi.venkataraman@intel.com>
Reviewed-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index a44c6807df01..8dd4712620ff 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1271,7 +1271,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 			enum ieee80211_sta_state state;
 
 			for (state = IEEE80211_STA_NOTEXIST;
-			     state < sta->sta_state - 1; state++)
+			     state < sta->sta_state; state++)
 				WARN_ON(drv_sta_state(local, sta->sdata, sta,
 						      state, state + 1));
 		}
-- 
cgit v1.2.3-55-g7522


From 71ecfa1893034eeb1c93e02e22ee2ad26d080858 Mon Sep 17 00:00:00 2001
From: Johannes Berg
Date: Thu, 31 May 2012 15:09:27 +0200
Subject: mac80211: clean up remain-on-channel on interface stop

When any interface goes down, it could be the one that we
were doing a remain-on-channel with. We therefore need to
cancel the remain-on-channel and flush the related work
structs so they don't run after the interface has been
removed or even destroyed.

It's also possible in this case that an off-channel SKB
was never transmitted, so free it if this is the case.
Note that this can also happen if the driver finishes
the off-channel period without ever starting it.

Cc: stable@kernel.org
Reported-by: Nirav Shah <nirav.j2.shah@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c      | 12 ++++++++++++
 net/mac80211/offchannel.c | 16 ++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d4c19a7773db..8664111d0566 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -637,6 +637,18 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 		ieee80211_configure_filter(local);
 		break;
 	default:
+		mutex_lock(&local->mtx);
+		if (local->hw_roc_dev == sdata->dev &&
+		    local->hw_roc_channel) {
+			/* ignore return value since this is racy */
+			drv_cancel_remain_on_channel(local);
+			ieee80211_queue_work(&local->hw, &local->hw_roc_done);
+		}
+		mutex_unlock(&local->mtx);
+
+		flush_work(&local->hw_roc_start);
+		flush_work(&local->hw_roc_done);
+
 		flush_work(&sdata->work);
 		/*
 		 * When we get here, the interface is marked down.
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index f054e94901a2..935aa4b6deee 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -234,6 +234,22 @@ static void ieee80211_hw_roc_done(struct work_struct *work)
 		return;
 	}
 
+	/* was never transmitted */
+	if (local->hw_roc_skb) {
+		u64 cookie;
+
+		cookie = local->hw_roc_cookie ^ 2;
+
+		cfg80211_mgmt_tx_status(local->hw_roc_dev, cookie,
+					local->hw_roc_skb->data,
+					local->hw_roc_skb->len, false,
+					GFP_KERNEL);
+
+		kfree_skb(local->hw_roc_skb);
+		local->hw_roc_skb = NULL;
+		local->hw_roc_skb_for_status = NULL;
+	}
+
 	if (!local->hw_roc_for_tx)
 		cfg80211_remain_on_channel_expired(local->hw_roc_dev,
 						   local->hw_roc_cookie,
-- 
cgit v1.2.3-55-g7522


From d8c7aae64cd2db5eccc631c29fa978a24fb1feef Mon Sep 17 00:00:00 2001
From: Felix Fietkau
Date: Wed, 30 May 2012 15:32:24 +0200
Subject: mac80211: add missing rcu_read_lock/unlock in agg-rx session timer

Fixes a lockdep warning:

===================================================
[ INFO: suspicious rcu_dereference_check() usage. ]
---------------------------------------------------
net/mac80211/agg-rx.c:148 invoked rcu_dereference_check() without protection!

other info that might help us debug this:

rcu_scheduler_active = 1, debug_locks = 1
1 lock held by arecord/11226:
 #0:  (&tid_agg_rx->session_timer){+.-...}, at: [<ffffffff81066bb0>] call_timer_fn+0x0/0x360

stack backtrace:
Pid: 11226, comm: arecord Not tainted 3.1.0-kml #16
Call Trace:
 <IRQ>  [<ffffffff81093454>] lockdep_rcu_dereference+0xa4/0xc0
 [<ffffffffa02778c9>] sta_rx_agg_session_timer_expired+0xc9/0x110 [mac80211]
 [<ffffffffa0277800>] ? ieee80211_process_addba_resp+0x220/0x220 [mac80211]
 [<ffffffff81066c3a>] call_timer_fn+0x8a/0x360
 [<ffffffff81066bb0>] ? init_timer_deferrable_key+0x30/0x30
 [<ffffffff81477bb0>] ? _raw_spin_unlock_irq+0x30/0x70
 [<ffffffff81067049>] run_timer_softirq+0x139/0x310
 [<ffffffff81091d5e>] ? put_lock_stats.isra.25+0xe/0x40
 [<ffffffff810922ac>] ? lock_release_holdtime.part.26+0xdc/0x160
 [<ffffffffa0277800>] ? ieee80211_process_addba_resp+0x220/0x220 [mac80211]
 [<ffffffff8105cb78>] __do_softirq+0xc8/0x3c0
 [<ffffffff8108f088>] ? tick_dev_program_event+0x48/0x110
 [<ffffffff8108f16f>] ? tick_program_event+0x1f/0x30
 [<ffffffff81153b15>] ? putname+0x35/0x50
 [<ffffffff8147a43c>] call_softirq+0x1c/0x30
 [<ffffffff81004c55>] do_softirq+0xa5/0xe0
 [<ffffffff8105d1ee>] irq_exit+0xae/0xe0
 [<ffffffff8147ac6b>] smp_apic_timer_interrupt+0x6b/0x98
 [<ffffffff81479ab3>] apic_timer_interrupt+0x73/0x80
 <EOI>  [<ffffffff8146aac6>] ? free_debug_processing+0x1a1/0x1d5
 [<ffffffff81153b15>] ? putname+0x35/0x50
 [<ffffffff8146ab2b>] __slab_free+0x31/0x2ca
 [<ffffffff81477c3a>] ? _raw_spin_unlock_irqrestore+0x4a/0x90
 [<ffffffff81253b8f>] ? __debug_check_no_obj_freed+0x15f/0x210
 [<ffffffff81097054>] ? lock_release_nested+0x84/0xc0
 [<ffffffff8113ec55>] ? kmem_cache_free+0x105/0x250
 [<ffffffff81153b15>] ? putname+0x35/0x50
 [<ffffffff81153b15>] ? putname+0x35/0x50
 [<ffffffff8113ed8f>] kmem_cache_free+0x23f/0x250
 [<ffffffff81153b15>] putname+0x35/0x50
 [<ffffffff81146d8d>] do_sys_open+0x16d/0x1d0
 [<ffffffff81146e10>] sys_open+0x20/0x30
 [<ffffffff81478f42>] system_call_fastpath+0x16/0x1b

Reported-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 26ddb699d693..c649188314cc 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -145,15 +145,20 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
 	struct tid_ampdu_rx *tid_rx;
 	unsigned long timeout;
 
+	rcu_read_lock();
 	tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[*ptid]);
-	if (!tid_rx)
+	if (!tid_rx) {
+		rcu_read_unlock();
 		return;
+	}
 
 	timeout = tid_rx->last_rx + TU_TO_JIFFIES(tid_rx->timeout);
 	if (time_is_after_jiffies(timeout)) {
 		mod_timer(&tid_rx->session_timer, timeout);
+		rcu_read_unlock();
 		return;
 	}
+	rcu_read_unlock();
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
-- 
cgit v1.2.3-55-g7522


From 5204267d2fd5e98fc52b44fec01ad10352642b78 Mon Sep 17 00:00:00 2001
From: Joe Perches
Date: Wed, 30 May 2012 13:25:54 -0700
Subject: mac80211: Fix likely misuse of | for &

Using | with a constant is always true.
Likely this should have be &.

cc: Ben Greear <greearb@candelatech.com>
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 495831ee48f1..e9cecca5c44d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -533,16 +533,16 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,
 		sinfo.filled = 0;
 		sta_set_sinfo(sta, &sinfo);
 
-		if (sinfo.filled | STATION_INFO_TX_BITRATE)
+		if (sinfo.filled & STATION_INFO_TX_BITRATE)
 			data[i] = 100000 *
 				cfg80211_calculate_bitrate(&sinfo.txrate);
 		i++;
-		if (sinfo.filled | STATION_INFO_RX_BITRATE)
+		if (sinfo.filled & STATION_INFO_RX_BITRATE)
 			data[i] = 100000 *
 				cfg80211_calculate_bitrate(&sinfo.rxrate);
 		i++;
 
-		if (sinfo.filled | STATION_INFO_SIGNAL_AVG)
+		if (sinfo.filled & STATION_INFO_SIGNAL_AVG)
 			data[i] = (u8)sinfo.signal_avg;
 		i++;
 	} else {
-- 
cgit v1.2.3-55-g7522


From 794454ce72a298de6f4536ade597bdcc7dcde7c7 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov
Date: Sun, 3 Jun 2012 23:32:32 +0300
Subject: mac80211: fix non RCU-safe sta_list manipulation

sta_info_cleanup locks the sta_list using rcu_read_lock however
the delete operation isn't rcu safe. A race between sta_info_cleanup
timer being called and a STA being removed can occur which leads
to a panic while traversing sta_list. Fix this by switching to the
RCU-safe versions.

Cc: stable@vger.kernel.org
Reported-by: Eyal Shapira <eyal@wizery.com>
Signed-off-by: Arik Nemtsov <arik@wizery.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f5b1638fbf80..de455f8bbb91 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -378,7 +378,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
 	/* make the station visible */
 	sta_info_hash_add(local, sta);
 
-	list_add(&sta->list, &local->sta_list);
+	list_add_rcu(&sta->list, &local->sta_list);
 
 	set_sta_flag(sta, WLAN_STA_INSERTED);
 
@@ -688,7 +688,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
 	if (ret)
 		return ret;
 
-	list_del(&sta->list);
+	list_del_rcu(&sta->list);
 
 	mutex_lock(&local->key_mtx);
 	for (i = 0; i < NUM_DEFAULT_KEYS; i++)
-- 
cgit v1.2.3-55-g7522


From ddcd0f41471a1e0394c8840a119ec3986a78462c Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes
Date: Thu, 31 May 2012 22:53:39 -0300
Subject: Bluetooth: Fix checking the wrong flag when accepting a socket

Most probably a typo, the check should have been for BT_SK_DEFER_SETUP
instead of BT_DEFER_SETUP (which right now only represents a socket
option).

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@openbossa.org>
Acked-by: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/af_bluetooth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 46e7f86acfc9..3e18af4dadc4 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -210,7 +210,7 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
 		}
 
 		if (sk->sk_state == BT_CONNECTED || !newsock ||
-		    test_bit(BT_DEFER_SETUP, &bt_sk(parent)->flags)) {
+		    test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags)) {
 			bt_accept_unlink(sk);
 			if (newsock)
 				sock_graft(sk, newsock);
-- 
cgit v1.2.3-55-g7522


From 463454b5dbd8dbab6e2fc6c557329e5b811b9c32 Mon Sep 17 00:00:00 2001
From: Johannes Berg
Date: Tue, 5 Jun 2012 12:16:50 +0200
Subject: cfg80211: fix interface combinations check

If a given interface combination doesn't contain
a required interface type then we missed checking
that and erroneously allowed it even though iface
type wasn't there at all. Add a check that makes
sure that all interface types are accounted for.

Cc: stable@kernel.org
Reported-by: Mohammed Shafi Shajakhan <mohammed@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/util.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 55d99466babb..8f2d68fc3a44 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -935,6 +935,7 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
 				  enum nl80211_iftype iftype)
 {
 	struct wireless_dev *wdev_iter;
+	u32 used_iftypes = BIT(iftype);
 	int num[NUM_NL80211_IFTYPES];
 	int total = 1;
 	int i, j;
@@ -961,6 +962,7 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
 
 		num[wdev_iter->iftype]++;
 		total++;
+		used_iftypes |= BIT(wdev_iter->iftype);
 	}
 	mutex_unlock(&rdev->devlist_mtx);
 
@@ -970,6 +972,7 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
 	for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) {
 		const struct ieee80211_iface_combination *c;
 		struct ieee80211_iface_limit *limits;
+		u32 all_iftypes = 0;
 
 		c = &rdev->wiphy.iface_combinations[i];
 
@@ -984,6 +987,7 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
 			if (rdev->wiphy.software_iftypes & BIT(iftype))
 				continue;
 			for (j = 0; j < c->n_limits; j++) {
+				all_iftypes |= limits[j].types;
 				if (!(limits[j].types & BIT(iftype)))
 					continue;
 				if (limits[j].max < num[iftype])
@@ -991,7 +995,20 @@ int cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
 				limits[j].max -= num[iftype];
 			}
 		}
-		/* yay, it fits */
+
+		/*
+		 * Finally check that all iftypes that we're currently
+		 * using are actually part of this combination. If they
+		 * aren't then we can't use this combination and have
+		 * to continue to the next.
+		 */
+		if ((all_iftypes & used_iftypes) != used_iftypes)
+			goto cont;
+
+		/*
+		 * This combination covered all interface types and
+		 * supported the requested numbers, so we're good.
+		 */
 		kfree(limits);
 		return 0;
  cont:
-- 
cgit v1.2.3-55-g7522


From 55432d2b543a4b6dfae54f5c432a566877a85d90 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Tue, 5 Jun 2012 03:00:18 +0000
Subject: inetpeer: fix a race in inetpeer_gc_worker()

commit 5faa5df1fa2024 (inetpeer: Invalidate the inetpeer tree along with
the routing cache) added a race :

Before freeing an inetpeer, we must respect a RCU grace period, and make
sure no user will attempt to increase refcnt.

inetpeer_invalidate_tree() waits for a RCU grace period before inserting
inetpeer tree into gc_list and waking the worker. At that time, no
concurrent lookup can find a inetpeer in this tree.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |  5 ++++-
 net/ipv4/inetpeer.c    | 16 ++++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index b94765e38e80..2040bff945d4 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -40,7 +40,10 @@ struct inet_peer {
 	u32			pmtu_orig;
 	u32			pmtu_learned;
 	struct inetpeer_addr_base redirect_learned;
-	struct list_head	gc_list;
+	union {
+		struct list_head	gc_list;
+		struct rcu_head     gc_rcu;
+	};
 	/*
 	 * Once inet_peer is queued for deletion (refcnt == -1), following fields
 	 * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index d4d61b694fab..dfba343b2509 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -560,6 +560,17 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
 }
 EXPORT_SYMBOL(inet_peer_xrlim_allow);
 
+static void inetpeer_inval_rcu(struct rcu_head *head)
+{
+	struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu);
+
+	spin_lock_bh(&gc_lock);
+	list_add_tail(&p->gc_list, &gc_list);
+	spin_unlock_bh(&gc_lock);
+
+	schedule_delayed_work(&gc_work, gc_delay);
+}
+
 void inetpeer_invalidate_tree(int family)
 {
 	struct inet_peer *old, *new, *prev;
@@ -576,10 +587,7 @@ void inetpeer_invalidate_tree(int family)
 	prev = cmpxchg(&base->root, old, new);
 	if (prev == old) {
 		base->total = 0;
-		spin_lock(&gc_lock);
-		list_add_tail(&prev->gc_list, &gc_list);
-		spin_unlock(&gc_lock);
-		schedule_delayed_work(&gc_work, gc_delay);
+		call_rcu(&prev->gc_rcu, inetpeer_inval_rcu);
 	}
 
 out:
-- 
cgit v1.2.3-55-g7522


From d1992b169d31f339dc5ea4e9f312567c8cf322a3 Mon Sep 17 00:00:00 2001
From: Hans Schillstrom
Date: Thu, 17 May 2012 22:35:46 +0000
Subject: netfilter: xt_HMARK: fix endianness and provide consistent hashing

This patch addresses two issues:

a) Fix usage of u32 and __be32 that causes endianess warnings via sparse.
b) Ensure consistent hashing in a cluster that is composed of big and
   little endian systems. Thus, we obtain the same hash mark in an
   heterogeneous cluster.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/xt_HMARK.h |  5 +++
 net/netfilter/xt_HMARK.c           | 72 ++++++++++++++++++++++----------------
 2 files changed, 46 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/xt_HMARK.h b/include/linux/netfilter/xt_HMARK.h
index abb1650940d2..826fc5807577 100644
--- a/include/linux/netfilter/xt_HMARK.h
+++ b/include/linux/netfilter/xt_HMARK.h
@@ -27,7 +27,12 @@ union hmark_ports {
 		__u16	src;
 		__u16	dst;
 	} p16;
+	struct {
+		__be16	src;
+		__be16	dst;
+	} b16;
 	__u32	v32;
+	__be32	b32;
 };
 
 struct xt_hmark_info {
diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index 0a96a43108ed..1686ca1b53a1 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c
@@ -32,13 +32,13 @@ MODULE_ALIAS("ipt_HMARK");
 MODULE_ALIAS("ip6t_HMARK");
 
 struct hmark_tuple {
-	u32			src;
-	u32			dst;
+	__be32			src;
+	__be32			dst;
 	union hmark_ports	uports;
-	uint8_t			proto;
+	u8			proto;
 };
 
-static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask)
+static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask)
 {
 	return (addr32[0] & mask[0]) ^
 	       (addr32[1] & mask[1]) ^
@@ -46,8 +46,8 @@ static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask)
 	       (addr32[3] & mask[3]);
 }
 
-static inline u32
-hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask)
+static inline __be32
+hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask)
 {
 	switch (l3num) {
 	case AF_INET:
@@ -58,6 +58,22 @@ hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask)
 	return 0;
 }
 
+static inline void hmark_swap_ports(union hmark_ports *uports,
+				    const struct xt_hmark_info *info)
+{
+	union hmark_ports hp;
+	u16 src, dst;
+
+	hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32;
+	src = ntohs(hp.b16.src);
+	dst = ntohs(hp.b16.dst);
+
+	if (dst > src)
+		uports->v32 = (dst << 16) | src;
+	else
+		uports->v32 = (src << 16) | dst;
+}
+
 static int
 hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
 		    const struct xt_hmark_info *info)
@@ -74,22 +90,19 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
 	otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 	rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
 
-	t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.all,
-				 info->src_mask.all);
-	t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.all,
-				 info->dst_mask.all);
+	t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6,
+				 info->src_mask.ip6);
+	t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6,
+				 info->dst_mask.ip6);
 
 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
 		return 0;
 
 	t->proto = nf_ct_protonum(ct);
 	if (t->proto != IPPROTO_ICMP) {
-		t->uports.p16.src = otuple->src.u.all;
-		t->uports.p16.dst = rtuple->src.u.all;
-		t->uports.v32 = (t->uports.v32 & info->port_mask.v32) |
-				info->port_set.v32;
-		if (t->uports.p16.dst < t->uports.p16.src)
-			swap(t->uports.p16.dst, t->uports.p16.src);
+		t->uports.b16.src = otuple->src.u.all;
+		t->uports.b16.dst = rtuple->src.u.all;
+		hmark_swap_ports(&t->uports, info);
 	}
 
 	return 0;
@@ -98,15 +111,19 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
 #endif
 }
 
+/* This hash function is endian independent, to ensure consistent hashing if
+ * the cluster is composed of big and little endian systems. */
 static inline u32
 hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
 {
 	u32 hash;
+	u32 src = ntohl(t->src);
+	u32 dst = ntohl(t->dst);
 
-	if (t->dst < t->src)
-		swap(t->src, t->dst);
+	if (dst < src)
+		swap(src, dst);
 
-	hash = jhash_3words(t->src, t->dst, t->uports.v32, info->hashrnd);
+	hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd);
 	hash = hash ^ (t->proto & info->proto_mask);
 
 	return (((u64)hash * info->hmodulus) >> 32) + info->hoffset;
@@ -126,11 +143,7 @@ hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff,
 	if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0)
 		return;
 
-	t->uports.v32 = (t->uports.v32 & info->port_mask.v32) |
-			info->port_set.v32;
-
-	if (t->uports.p16.dst < t->uports.p16.src)
-		swap(t->uports.p16.dst, t->uports.p16.src);
+	hmark_swap_ports(&t->uports, info);
 }
 
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
@@ -178,8 +191,8 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
 			return -1;
 	}
 noicmp:
-	t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.all);
-	t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.all);
+	t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6);
+	t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6);
 
 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
 		return 0;
@@ -255,11 +268,8 @@ hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t,
 		}
 	}
 
-	t->src = (__force u32) ip->saddr;
-	t->dst = (__force u32) ip->daddr;
-
-	t->src &= info->src_mask.ip;
-	t->dst &= info->dst_mask.ip;
+	t->src = ip->saddr & info->src_mask.ip;
+	t->dst = ip->daddr & info->dst_mask.ip;
 
 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
 		return 0;
-- 
cgit v1.2.3-55-g7522


From d109e9af61a6d2fdf33dc615ab8b724a8e75a8a4 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso
Date: Mon, 4 Jun 2012 13:31:04 +0200
Subject: netfilter: nf_ct_h323: fix bug in rtcp natting

The nat_rtp_rtcp hook takes two separate parameters port and rtp_port.

port is expected to be the real h245 address (found inside the packet).
rtp_port is the even number closest to port (RTP ports are even and
RTCP ports are odd).

However currently, both port and rtp_port are having same value (both are
rounded to nearest even numbers).

This works well in case of openlogicalchannel with media (RTP/even) port.

But in case of openlogicalchannel for media control (RTCP/odd) port,
h245 address in the packet is wrongly modified to have an even port.

I am attaching a pcap demonstrating the problem, for any further analysis.

This behavior was introduced around v2.6.19 while rewriting the helper.

Signed-off-by: Jagdish Motwani <jagdish.motwani@elitecore.com>
Signed-off-by: Sanket Shah <sanket.shah@elitecore.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_h323_main.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 46d69d7f1bb4..31f50bc3a312 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -270,9 +270,8 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 		return 0;
 
 	/* RTP port is even */
-	port &= htons(~1);
-	rtp_port = port;
-	rtcp_port = htons(ntohs(port) + 1);
+	rtp_port = port & ~htons(1);
+	rtcp_port = port | htons(1);
 
 	/* Create expect for RTP */
 	if ((rtp_exp = nf_ct_expect_alloc(ct)) == NULL)
-- 
cgit v1.2.3-55-g7522


From a06998b88b1651c5f71c0e35f528bf2057188ead Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Thu, 7 Jun 2012 00:07:20 +0000
Subject: net: l2tp_eth: fix kernel panic on rmmod l2tp_eth

We must prevent module unloading if some devices are still attached to
l2tp_eth driver.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Denys Fedoryshchenko <denys@visp.net.lb>
Tested-by: Denys Fedoryshchenko <denys@visp.net.lb>
Cc: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_eth.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 443591d629ca..185f12f4a5fa 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -162,6 +162,7 @@ static void l2tp_eth_delete(struct l2tp_session *session)
 		if (dev) {
 			unregister_netdev(dev);
 			spriv->dev = NULL;
+			module_put(THIS_MODULE);
 		}
 	}
 }
@@ -249,6 +250,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 	if (rc < 0)
 		goto out_del_dev;
 
+	__module_get(THIS_MODULE);
 	/* Must be done after register_netdev() */
 	strlcpy(session->ifname, dev->name, IFNAMSIZ);
 
-- 
cgit v1.2.3-55-g7522


From 4bd6683bd400c8b1d2ad544bb155d86a5d10f91c Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Thu, 7 Jun 2012 04:58:35 +0000
Subject: net: neighbour: fix neigh_dump_info()

Denys found out "ip neigh" output was truncated to
about 54 neighbours.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index eb09f8bbbf07..d81d026138f0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2219,9 +2219,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 	rcu_read_lock_bh();
 	nht = rcu_dereference_bh(tbl->nht);
 
-	for (h = 0; h < (1 << nht->hash_shift); h++) {
-		if (h < s_h)
-			continue;
+	for (h = s_h; h < (1 << nht->hash_shift); h++) {
 		if (h > s_h)
 			s_idx = 0;
 		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
@@ -2260,9 +2258,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 
 	read_lock_bh(&tbl->lock);
 
-	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
-		if (h < s_h)
-			continue;
+	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
 		if (h > s_h)
 			s_idx = 0;
 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
@@ -2297,7 +2293,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 	struct neigh_table *tbl;
 	int t, family, s_t;
 	int proxy = 0;
-	int err = 0;
+	int err;
 
 	read_lock(&neigh_tbl_lock);
 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
@@ -2311,7 +2307,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 
 	s_t = cb->args[0];
 
-	for (tbl = neigh_tables, t = 0; tbl && (err >= 0);
+	for (tbl = neigh_tables, t = 0; tbl;
 	     tbl = tbl->next, t++) {
 		if (t < s_t || (family && tbl->family != family))
 			continue;
@@ -2322,6 +2318,8 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 			err = pneigh_dump_table(tbl, skb, cb);
 		else
 			err = neigh_dump_table(tbl, skb, cb);
+		if (err < 0)
+			break;
 	}
 	read_unlock(&neigh_tbl_lock);
 
-- 
cgit v1.2.3-55-g7522


From 8bd74516b1bd9308c17f67583134d93f777203ca Mon Sep 17 00:00:00 2001
From: Thomas Graf
Date: Thu, 7 Jun 2012 06:51:04 +0000
Subject: ipv6: fib: Restore NTF_ROUTER exception in fib6_age()

Commit 5339ab8b1dd82 (ipv6: fib: Convert fib6_age() to
dst_neigh_lookup().) seems to have mistakenly inverted the
exception for cached NTF_ROUTER routes.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 0c220a416626..74c21b924a79 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1561,7 +1561,7 @@ static int fib6_age(struct rt6_info *rt, void *arg)
 				neigh_flags = neigh->flags;
 				neigh_release(neigh);
 			}
-			if (neigh_flags & NTF_ROUTER) {
+			if (!(neigh_flags & NTF_ROUTER)) {
 				RT6_TRACE("purging route %p via non-router but gateway\n",
 					  rt);
 				return -1;
-- 
cgit v1.2.3-55-g7522


From 278f015e9b67566991d4e831fe38e0ebbeef245e Mon Sep 17 00:00:00 2001
From: Dave Jones
Date: Wed, 6 Jun 2012 08:45:59 +0000
Subject: appletalk: Remove out of date message in printk

I accidentally triggered this printk, which amused me for a few moments.
Given we're post 2.2, we could just -EACCES, but does anyone even care about Appletalk now ?
I figure it's better to leave sleeping dogs lie, and just update the message.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/appletalk/ddp.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 0301b328cf0f..86852963b7f7 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1208,9 +1208,7 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr,
 	if (addr->sat_addr.s_node == ATADDR_BCAST &&
 	    !sock_flag(sk, SOCK_BROADCAST)) {
 #if 1
-		printk(KERN_WARNING "%s is broken and did not set "
-				    "SO_BROADCAST. It will break when 2.2 is "
-				    "released.\n",
+		pr_warn("atalk_connect: %s is broken and did not set SO_BROADCAST.\n",
 			current->comm);
 #else
 		return -EACCES;
-- 
cgit v1.2.3-55-g7522


From 2d8dbb04c63e5369988f008bc4df3359c01d8812 Mon Sep 17 00:00:00 2001
From: Vincent Bernat
Date: Tue, 5 Jun 2012 03:41:42 +0000
Subject: snmp: fix OutOctets counter to include forwarded datagrams

RFC 4293 defines ipIfStatsOutOctets (similar definition for
ipSystemStatsOutOctets):

   The total number of octets in IP datagrams delivered to the lower
   layers for transmission.  Octets from datagrams counted in
   ipIfStatsOutTransmits MUST be counted here.

And ipIfStatsOutTransmits:

   The total number of IP datagrams that this entity supplied to the
   lower layers for transmission.  This includes datagrams generated
   locally and those forwarded by this entity.

Therefore, IPSTATS_MIB_OUTOCTETS must be incremented when incrementing
IPSTATS_MIB_OUTFORWDATAGRAMS.

IP_UPD_PO_STATS is not used since ipIfStatsOutRequests must not
include forwarded datagrams:

   The total number of IP datagrams that local IP user-protocols
   (including ICMP) supplied to IP in requests for transmission.  Note
   that this counter does not include any datagrams counted in
   ipIfStatsOutForwDatagrams.

Signed-off-by: Vincent Bernat <bernat@luffy.cx>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_forward.c | 1 +
 net/ipv4/ipmr.c       | 1 +
 net/ipv6/ip6_output.c | 1 +
 net/ipv6/ip6mr.c      | 2 ++
 4 files changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index e5c44fc586ab..ab09b126423c 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -44,6 +44,7 @@ static int ip_forward_finish(struct sk_buff *skb)
 	struct ip_options *opt	= &(IPCB(skb)->opt);
 
 	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len);
 
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a9e519ad6db5..c94bbc6f2ba3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1574,6 +1574,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
 	struct ip_options *opt = &(IPCB(skb)->opt);
 
 	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len);
 
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 17b8c67998bb..decc21d19c53 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -526,6 +526,7 @@ int ip6_forward(struct sk_buff *skb)
 	hdr->hop_limit--;
 
 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 		       ip6_forward_finish);
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index b15dc08643a4..461e47c8e956 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1886,6 +1886,8 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb)
 {
 	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
+			 IPSTATS_MIB_OUTOCTETS, skb->len);
 	return dst_output(skb);
 }
 
-- 
cgit v1.2.3-55-g7522


From 58d1eab7ef1d7ff8e448699dfd1a21b7f3303296 Mon Sep 17 00:00:00 2001
From: Sasha Levin
Date: Wed, 6 Jun 2012 23:02:55 +0200
Subject: NFC: Fix possible NULL ptr deref when getting the name of a socket

llcp_sock_getname() might get called before the LLCP socket was created.
This condition isn't checked, and llcp_sock_getname will simply deref a
NULL ptr in that case.

This exists starting with d646960 ("NFC: Initial LLCP support").

Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/nfc/llcp/sock.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c
index 3f339b19d140..17a707db40eb 100644
--- a/net/nfc/llcp/sock.c
+++ b/net/nfc/llcp/sock.c
@@ -292,6 +292,9 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *addr,
 
 	pr_debug("%p\n", sk);
 
+	if (llcp_sock == NULL)
+		return -EBADFD;
+
 	addr->sa_family = AF_NFC;
 	*len = sizeof(struct sockaddr_nfc_llcp);
 
-- 
cgit v1.2.3-55-g7522


From 6aee4ca3d2217d3f76469e5ed576d62695f0912a Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka
Date: Thu, 7 Jun 2012 14:47:21 +0200
Subject: mac80211: add back channel change flag

commit 24398e39c8ee4a9d9123eed322b859ece4d16cac
Author: Johannes Berg <johannes.berg@intel.com>
Date:   Wed Mar 28 10:58:36 2012 +0200

    mac80211: set HT channel before association

removed IEEE80211_CONF_CHANGE_CHANNEL argument from ieee80211_hw_config,
which is required by iwl4965 driver, otherwise that driver does not
configure channel properly and is not able to associate.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d94627c2929c..91d84cc77bbf 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3124,7 +3124,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 	}
 
 	local->oper_channel = cbss->channel;
-	ieee80211_hw_config(local, 0);
+	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
 
 	if (!have_sta) {
 		u32 rates = 0, basic_rates = 0;
-- 
cgit v1.2.3-55-g7522


From 4399a4df98a63e30fd16e9d0cecc46ea92269e8f Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Fri, 8 Jun 2012 06:25:00 +0000
Subject: l2tp: fix a race in l2tp_ip_sendmsg()

Commit 081b1b1bb27f (l2tp: fix l2tp_ip_sendmsg() route handling) added
a race, in case IP route cache is disabled.

In this case, we should not do the dst_release(&rt->dst), since it'll
free the dst immediately, instead of waiting a RCU grace period.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: James Chapman <jchapman@katalix.com>
Cc: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 70614e7affab..61d8b75d2686 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -464,10 +464,12 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 					   sk->sk_bound_dev_if);
 		if (IS_ERR(rt))
 			goto no_route;
-		if (connected)
+		if (connected) {
 			sk_setup_caps(sk, &rt->dst);
-		else
-			dst_release(&rt->dst); /* safe since we hold rcu_read_lock */
+		} else {
+			skb_dst_set(skb, &rt->dst);
+			goto xmit;
+		}
 	}
 
 	/* We dont need to clone dst here, it is guaranteed to not disappear.
@@ -475,6 +477,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 	 */
 	skb_dst_set_noref(skb, &rt->dst);
 
+xmit:
 	/* Queue the packet to IP for output */
 	rc = ip_queue_xmit(skb, &inet->cork.fl);
 	rcu_read_unlock();
-- 
cgit v1.2.3-55-g7522


From c6c4b97c6b7003e8082dd43db224c1d1f7a24aa2 Mon Sep 17 00:00:00 2001
From: Randy Dunlap
Date: Fri, 8 Jun 2012 14:01:44 +0000
Subject: net/core: fix kernel-doc warnings

Fix kernel-doc warnings in net/core:

Warning(net/core/skbuff.c:3368): No description found for parameter 'delta_truesize'
Warning(net/core/filter.c:628): No description found for parameter 'pfp'
Warning(net/core/filter.c:628): Excess function parameter 'sk' description in 'sk_unattached_filter_create'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 4 ++--
 net/core/skbuff.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index a3eddb515d1b..d4ce2dc712e3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -616,9 +616,9 @@ static int __sk_prepare_filter(struct sk_filter *fp)
 /**
  *	sk_unattached_filter_create - create an unattached filter
  *	@fprog: the filter program
- *	@sk: the socket to use
+ *	@pfp: the unattached filter that is created
  *
- * Create a filter independent ofr any socket. We first run some
+ * Create a filter independent of any socket. We first run some
  * sanity checks on it to make sure it does not explode on us later.
  * If an error occurs or there is insufficient memory for the filter
  * a negative errno code is returned. On success the return is zero.
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 016694d62484..d78671e9d545 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3361,7 +3361,7 @@ EXPORT_SYMBOL(kfree_skb_partial);
  * @to: prior buffer
  * @from: buffer to add
  * @fragstolen: pointer to boolean
- *
+ * @delta_truesize: how much more was allocated than was requested
  */
 bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 		      bool *fragstolen, int *delta_truesize)
-- 
cgit v1.2.3-55-g7522


From 5fcb08befaf57faa1b00e514915c1660252b8c26 Mon Sep 17 00:00:00 2001
From: Sasha Levin
Date: Mon, 11 Jun 2012 10:18:13 -0500
Subject: 9p: BUG before corrupting memory

The BUG_ON() in pack_sg_list() would get triggered only one time after we've
corrupted some memory by sg_set_buf() into an invalid sg buffer.

I'm still working on figuring out why I manage to trigger that bug...

Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 5af18d11b518..2a167658bb95 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -192,10 +192,10 @@ static int pack_sg_list(struct scatterlist *sg, int start,
 		s = rest_of_page(data);
 		if (s > count)
 			s = count;
+		BUG_ON(index > limit);
 		sg_set_buf(&sg[index++], data, s);
 		count -= s;
 		data += s;
-		BUG_ON(index > limit);
 	}
 
 	return index-start;
-- 
cgit v1.2.3-55-g7522


From 92123e068efa310b09e9943ac1cfd10ff6b6d2e4 Mon Sep 17 00:00:00 2001
From: Jeff Layton
Date: Mon, 11 Jun 2012 10:03:42 -0400
Subject: rpc_pipefs: allow rpc_purge_list to take a NULL waitq pointer

In the event that we don't have a dentry for a rpc_pipefs pipe, we still
need to allow the queue_timeout job to clean out the queue. There's just
no waitq to wake up in that event.

Cc: stable@kernel.org
Reported-by: Hans de Bruin <jmdebruin@xmsnet.nl>
Reported-by: Joerg Platte <jplatte@naasa.net>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpc_pipe.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 04040476082e..21fde99e5c56 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -71,7 +71,9 @@ static void rpc_purge_list(wait_queue_head_t *waitq, struct list_head *head,
 		msg->errno = err;
 		destroy_msg(msg);
 	} while (!list_empty(head));
-	wake_up(waitq);
+
+	if (waitq)
+		wake_up(waitq);
 }
 
 static void
@@ -91,11 +93,9 @@ rpc_timeout_upcall_queue(struct work_struct *work)
 	}
 	dentry = dget(pipe->dentry);
 	spin_unlock(&pipe->lock);
-	if (dentry) {
-		rpc_purge_list(&RPC_I(dentry->d_inode)->waitq,
-			       &free_list, destroy_msg, -ETIMEDOUT);
-		dput(dentry);
-	}
+	rpc_purge_list(dentry ? &RPC_I(dentry->d_inode)->waitq : NULL,
+			&free_list, destroy_msg, -ETIMEDOUT);
+	dput(dentry);
 }
 
 ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg,
-- 
cgit v1.2.3-55-g7522


From 954fba0274058d27c7c07b5ea07c41b3b7477894 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Tue, 12 Jun 2012 19:30:21 +0000
Subject: netpoll: fix netpoll_send_udp() bugs

Bogdan Hamciuc diagnosed and fixed following bug in netpoll_send_udp() :

"skb->len += len;" instead of "skb_put(skb, len);"

Meaning that _if_ a network driver needs to call skb_realloc_headroom(),
only packet headers would be copied, leaving garbage in the payload.

However the skb_realloc_headroom() must be avoided as much as possible
since it requires memory and netpoll tries hard to work even if memory
is exhausted (using a pool of preallocated skbs)

It appears netpoll_send_udp() reserved 16 bytes for the ethernet header,
which happens to work for typicall drivers but not all.

Right thing is to use LL_RESERVED_SPACE(dev)
(And also add dev->needed_tailroom of tailroom)

This patch combines both fixes.

Many thanks to Bogdan for raising this issue.

Reported-by: Bogdan Hamciuc <bogdan.hamciuc@freescale.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Tested-by: Bogdan Hamciuc <bogdan.hamciuc@freescale.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Neil Horman <nhorman@tuxdriver.com>
Reviewed-by: Neil Horman <nhorman@tuxdriver.com>
Reviewed-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 3d84fb9d8873..f9f40b932e4b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -362,22 +362,23 @@ EXPORT_SYMBOL(netpoll_send_skb_on_dev);
 
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 {
-	int total_len, eth_len, ip_len, udp_len;
+	int total_len, ip_len, udp_len;
 	struct sk_buff *skb;
 	struct udphdr *udph;
 	struct iphdr *iph;
 	struct ethhdr *eth;
 
 	udp_len = len + sizeof(*udph);
-	ip_len = eth_len = udp_len + sizeof(*iph);
-	total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;
+	ip_len = udp_len + sizeof(*iph);
+	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
 
-	skb = find_skb(np, total_len, total_len - len);
+	skb = find_skb(np, total_len + np->dev->needed_tailroom,
+		       total_len - len);
 	if (!skb)
 		return;
 
 	skb_copy_to_linear_data(skb, msg, len);
-	skb->len += len;
+	skb_put(skb, len);
 
 	skb_push(skb, sizeof(*udph));
 	skb_reset_transport_header(skb);
-- 
cgit v1.2.3-55-g7522


From 680584fab05efff732b5ae16ad601ba994d7b505 Mon Sep 17 00:00:00 2001
From: Alex Elder
Date: Mon, 4 Jun 2012 14:43:32 -0500
Subject: libceph: osd_client: don't drop reply reference too early

In ceph_osdc_release_request(), a reference to the r_reply message
is dropped.  But just after that, that same message is revoked if it
was in use to receive an incoming reply.  Reorder these so we are
sure we hold a reference until we're actually done with the message.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
(cherry picked from commit ab8cb34a4b2f60281a4b18b1f1ad23bc2313d91b)
---
 net/ceph/osd_client.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 1ffebed5ce0f..13538da41dd6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -139,8 +139,6 @@ void ceph_osdc_release_request(struct kref *kref)
 
 	if (req->r_request)
 		ceph_msg_put(req->r_request);
-	if (req->r_reply)
-		ceph_msg_put(req->r_reply);
 	if (req->r_con_filling_msg) {
 		dout("release_request revoking pages %p from con %p\n",
 		     req->r_pages, req->r_con_filling_msg);
@@ -148,6 +146,8 @@ void ceph_osdc_release_request(struct kref *kref)
 				      req->r_reply);
 		ceph_con_put(req->r_con_filling_msg);
 	}
+	if (req->r_reply)
+		ceph_msg_put(req->r_reply);
 	if (req->r_own_pages)
 		ceph_release_page_vector(req->r_pages,
 					 req->r_num_pages);
-- 
cgit v1.2.3-55-g7522


From 88ed6ea0b295f8e2383d599a04027ec596cdf97b Mon Sep 17 00:00:00 2001
From: Sage Weil
Date: Thu, 31 May 2012 20:22:18 -0700
Subject: libceph: use con get/put ops from osd_client

There were a few direct calls to ceph_con_{get,put}() instead of the con
ops from osd_client.c.  This is a bug since those ops aren't defined to
be ceph_con_get/put.

This breaks refcounting on the ceph_osd structs that contain the
ceph_connections, and could lead to all manner of strangeness.

The purpose of the ->get and ->put methods in a ceph connection are
to allow the connection to indicate it has a reference to something
external to the messaging system, *not* to indicate something
external has a reference to the connection.

[elder@inktank.com: added that last sentence]

Signed-off-by: Sage Weil <sage@newdream.net>
Reviewed-by: Alex Elder <elder@inktank.com>
(cherry picked from commit 0d47766f14211a73eaf54cab234db134ece79f49)
---
 net/ceph/osd_client.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 13538da41dd6..ca59e66c9787 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -144,7 +144,7 @@ void ceph_osdc_release_request(struct kref *kref)
 		     req->r_pages, req->r_con_filling_msg);
 		ceph_con_revoke_message(req->r_con_filling_msg,
 				      req->r_reply);
-		ceph_con_put(req->r_con_filling_msg);
+		req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
 	}
 	if (req->r_reply)
 		ceph_msg_put(req->r_reply);
@@ -1216,7 +1216,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 	if (req->r_con_filling_msg == con && req->r_reply == msg) {
 		dout(" dropping con_filling_msg ref %p\n", con);
 		req->r_con_filling_msg = NULL;
-		ceph_con_put(con);
+		con->ops->put(con);
 	}
 
 	if (!req->r_got_reply) {
@@ -2028,7 +2028,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
 		dout("get_reply revoking msg %p from old con %p\n",
 		     req->r_reply, req->r_con_filling_msg);
 		ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply);
-		ceph_con_put(req->r_con_filling_msg);
+		req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
 		req->r_con_filling_msg = NULL;
 	}
 
@@ -2063,7 +2063,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
 #endif
 	}
 	*skip = 0;
-	req->r_con_filling_msg = ceph_con_get(con);
+	req->r_con_filling_msg = con->ops->get(con);
 	dout("get_reply tid %lld %p\n", tid, m);
 
 out:
-- 
cgit v1.2.3-55-g7522


From b132cf4c733f91bb4dd2277ea049243cf16e8b66 Mon Sep 17 00:00:00 2001
From: Yan, Zheng
Date: Wed, 6 Jun 2012 19:35:55 -0500
Subject: rbd: Clear ceph_msg->bio_iter for retransmitted message

The bug can cause NULL pointer dereference in write_partial_msg_pages

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Reviewed-by: Alex Elder <elder@inktank.com>
(cherry picked from commit 43643528cce60ca184fe8197efa8e8da7c89a037)
---
 net/ceph/messenger.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 524f4e4f598b..b332c3d76059 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -563,6 +563,10 @@ static void prepare_write_message(struct ceph_connection *con)
 		m->hdr.seq = cpu_to_le64(++con->out_seq);
 		m->needs_out_seq = false;
 	}
+#ifdef CONFIG_BLOCK
+	else
+		m->bio_iter = NULL;
+#endif
 
 	dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
 	     m, con->out_seq, le16_to_cpu(m->hdr.type),
-- 
cgit v1.2.3-55-g7522


From 642c0dbde32f34baa7886e988a067089992adc8f Mon Sep 17 00:00:00 2001
From: Sage Weil
Date: Sun, 10 Jun 2012 20:43:56 -0700
Subject: libceph: flush msgr queue during mon_client shutdown

We need to flush the msgr workqueue during mon_client shutdown to
ensure that any work affecting our embedded ceph_connection is
finished so that we can be safely destroyed.

Previously, we were flushing the work queue after osd_client
shutdown and before mon_client shutdown to ensure that any osd
connection refs to authorizers are flushed.  Remove the redundant
flush, and document in the comment that the mon_client flush is
needed to cover that case as well.

Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
(cherry picked from commit f3dea7edd3d449fe7a6d402c1ce56a294b985261)
---
 net/ceph/ceph_common.c | 7 -------
 net/ceph/mon_client.c  | 8 ++++++++
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index a776f751edbf..ba4323bce0e9 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -504,13 +504,6 @@ void ceph_destroy_client(struct ceph_client *client)
 	/* unmount */
 	ceph_osdc_stop(&client->osdc);
 
-	/*
-	 * make sure osd connections close out before destroying the
-	 * auth module, which is needed to free those connections'
-	 * ceph_authorizers.
-	 */
-	ceph_msgr_flush();
-
 	ceph_monc_stop(&client->monc);
 
 	ceph_debugfs_client_cleanup(client);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 10d6008d31f2..d0649a9655be 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -847,6 +847,14 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
 
 	mutex_unlock(&monc->mutex);
 
+	/*
+	 * flush msgr queue before we destroy ourselves to ensure that:
+	 *  - any work that references our embedded con is finished.
+	 *  - any osd_client or other work that may reference an authorizer
+	 *    finishes before we shut down the auth subsystem.
+	 */
+	ceph_msgr_flush();
+
 	ceph_auth_destroy(monc->auth);
 
 	ceph_msg_put(monc->m_auth);
-- 
cgit v1.2.3-55-g7522