From bda8e7752063cdbdd1d308bc1705400a8cec1aeb Mon Sep 17 00:00:00 2001
From: Sripathi Kodi
Date: Thu, 25 Mar 2010 12:45:30 +0000
Subject: 9p: add 9P2000.L statfs operation

I made a V2 of this patch on top of my patches for VFS switches. The
change was adding v9fs_statfs pointer to v9fs_super_ops_dotl
instead of v9fs_super_ops.

statfs - get file system statistics

size[4] Tstatfs tag[2] fid[4]
size[4] Rstatfs tag[2] type[4] bsize[4] blocks[8] bfree[8] bavail[8]
                files[8] ffree[8] fsid[8] namelen[4]

The statfs message is used to request file system information returned
by the statfs(2) system call, which is used by df(1) to report file
system and disk space usage.

Signed-off-by: Jim Garlick <garlick@llnl.gov>
Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/net/9p/9p.h     | 20 ++++++++++++++++++++
 include/net/9p/client.h |  1 +
 2 files changed, 21 insertions(+)

(limited to 'include/net')

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index a7fb54808a23..59300dc22dd0 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -86,6 +86,8 @@ do { \
 
 /**
  * enum p9_msg_t - 9P message types
+ * @P9_TSTATFS: file system status request
+ * @P9_RSTATFS: file system status response
  * @P9_TVERSION: version handshake request
  * @P9_RVERSION: version handshake response
  * @P9_TAUTH: request to establish authentication channel
@@ -125,6 +127,8 @@ do { \
  */
 
 enum p9_msg_t {
+	P9_TSTATFS = 8,
+	P9_RSTATFS,
 	P9_TVERSION = 100,
 	P9_RVERSION,
 	P9_TAUTH = 102,
@@ -350,6 +354,22 @@ struct p9_wstat {
 };
 
 /* Structures for Protocol Operations */
+struct p9_tstatfs {
+	u32 fid;
+};
+
+struct p9_rstatfs {
+	u32 type;
+	u32 bsize;
+	u64 blocks;
+	u64 bfree;
+	u64 bavail;
+	u64 files;
+	u64 ffree;
+	u64 fsid;
+	u32 namelen;
+};
+
 struct p9_tversion {
 	u32 msize;
 	struct p9_str version;
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 4f3760afc20f..f83c0d99e592 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -195,6 +195,7 @@ struct p9_fid {
 	struct list_head dlist;	/* list of all fids attached to a dentry */
 };
 
+int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb);
 int p9_client_version(struct p9_client *);
 struct p9_client *p9_client_create(const char *dev_name, char *options);
 void p9_client_destroy(struct p9_client *clnt);
-- 
cgit v1.2.3-55-g7522


From 4681dbdacb5cdc4d3273c3a97a1858d6e00a5fe7 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi
Date: Thu, 25 Mar 2010 12:47:26 +0000
Subject: 9p: add 9P2000.L rename operation

I made a V2 of this patch on top of my patches for VFS switches.
All the changes were due to change in some offsets.

rename - change name of file or directory

size[4] Trename tag[2] fid[4] newdirfid[4] name[s]
size[4] Rrename tag[2]

The rename message is used to change the name of a file, possibly moving it
to a new directory.  The 9P wstat message can only rename a file within the
same directory.

Signed-off-by: Jim Garlick <garlick@llnl.gov>
Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c       |  7 +++++++
 include/net/9p/9p.h     | 13 +++++++++++++
 include/net/9p/client.h |  1 +
 net/9p/client.c         | 28 ++++++++++++++++++++++++++++
 4 files changed, 49 insertions(+)

(limited to 'include/net')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 13a8ed6088b8..de9a39590b70 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -794,6 +794,13 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto clunk_olddir;
 	}
 
+	if (v9fs_proto_dotl(v9ses)) {
+		retval = p9_client_rename(oldfid, newdirfid,
+					(char *) new_dentry->d_name.name);
+		if (retval != -ENOSYS)
+			goto clunk_newdir;
+	}
+
 	/* 9P can only handle file rename in the same directory */
 	if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
 		P9_DPRINTK(P9_DEBUG_ERROR,
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 59300dc22dd0..156c26bb8bd7 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -88,6 +88,8 @@ do { \
  * enum p9_msg_t - 9P message types
  * @P9_TSTATFS: file system status request
  * @P9_RSTATFS: file system status response
+ * @P9_TRENAME: rename request
+ * @P9_RRENAME: rename response
  * @P9_TVERSION: version handshake request
  * @P9_RVERSION: version handshake response
  * @P9_TAUTH: request to establish authentication channel
@@ -129,6 +131,8 @@ do { \
 enum p9_msg_t {
 	P9_TSTATFS = 8,
 	P9_RSTATFS,
+	P9_TRENAME = 20,
+	P9_RRENAME,
 	P9_TVERSION = 100,
 	P9_RVERSION,
 	P9_TAUTH = 102,
@@ -370,6 +374,15 @@ struct p9_rstatfs {
 	u32 namelen;
 };
 
+struct p9_trename {
+	u32 fid;
+	u32 newdirfid;
+	struct p9_str name;
+};
+
+struct p9_rrename {
+};
+
 struct p9_tversion {
 	u32 msize;
 	struct p9_str version;
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index f83c0d99e592..7dd3ed85c782 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -196,6 +196,7 @@ struct p9_fid {
 };
 
 int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb);
+int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name);
 int p9_client_version(struct p9_client *);
 struct p9_client *p9_client_create(const char *dev_name, char *options);
 void p9_client_destroy(struct p9_client *clnt);
diff --git a/net/9p/client.c b/net/9p/client.c
index 430a1c4a7c6f..37c8da07a80b 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1404,3 +1404,31 @@ error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_statfs);
+
+int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name)
+{
+	int err;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	err = 0;
+	clnt = fid->clnt;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",
+			fid->fid, newdirfid->fid, name);
+
+	req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid,
+			newdirfid->fid, name);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid);
+
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_rename);
+
-- 
cgit v1.2.3-55-g7522


From 7aecf4944f2c05aafb73b4820e469c74b4ec8517 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland
Date: Fri, 21 May 2010 02:16:08 +0000
Subject: caif: Bugfix - use standard Linux lists

Discovered bug when running high number of parallel connect requests.
Replace buggy home brewed list with linux/list.h.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/cfctrl.h |  4 +--
 net/caif/cfctrl.c         | 92 ++++++++++++++---------------------------------
 2 files changed, 28 insertions(+), 68 deletions(-)

(limited to 'include/net')

diff --git a/include/net/caif/cfctrl.h b/include/net/caif/cfctrl.h
index 997603f2bf4c..9402543fc20d 100644
--- a/include/net/caif/cfctrl.h
+++ b/include/net/caif/cfctrl.h
@@ -94,8 +94,8 @@ struct cfctrl_request_info {
 	enum cfctrl_cmd cmd;
 	u8 channel_id;
 	struct cfctrl_link_param param;
-	struct cfctrl_request_info *next;
 	struct cflayer *client_layer;
+	struct list_head list;
 };
 
 struct cfctrl {
@@ -103,7 +103,7 @@ struct cfctrl {
 	struct cfctrl_rsp res;
 	atomic_t req_seq_no;
 	atomic_t rsp_seq_no;
-	struct cfctrl_request_info *first_req;
+	struct list_head list;
 	/* Protects from simultaneous access to first_req list */
 	spinlock_t info_list_lock;
 #ifndef CAIF_NO_LOOP
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 0ffe1e1ce901..fcfda98a5e6d 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -44,13 +44,14 @@ struct cflayer *cfctrl_create(void)
 	dev_info.id = 0xff;
 	memset(this, 0, sizeof(*this));
 	cfsrvl_init(&this->serv, 0, &dev_info);
-	spin_lock_init(&this->info_list_lock);
 	atomic_set(&this->req_seq_no, 1);
 	atomic_set(&this->rsp_seq_no, 1);
 	this->serv.layer.receive = cfctrl_recv;
 	sprintf(this->serv.layer.name, "ctrl");
 	this->serv.layer.ctrlcmd = cfctrl_ctrlcmd;
 	spin_lock_init(&this->loop_linkid_lock);
+	spin_lock_init(&this->info_list_lock);
+	INIT_LIST_HEAD(&this->list);
 	this->loop_linkid = 1;
 	return &this->serv.layer;
 }
@@ -112,20 +113,10 @@ bool cfctrl_req_eq(struct cfctrl_request_info *r1,
 void cfctrl_insert_req(struct cfctrl *ctrl,
 			      struct cfctrl_request_info *req)
 {
-	struct cfctrl_request_info *p;
 	spin_lock(&ctrl->info_list_lock);
-	req->next = NULL;
 	atomic_inc(&ctrl->req_seq_no);
 	req->sequence_no = atomic_read(&ctrl->req_seq_no);
-	if (ctrl->first_req == NULL) {
-		ctrl->first_req = req;
-		spin_unlock(&ctrl->info_list_lock);
-		return;
-	}
-	p = ctrl->first_req;
-	while (p->next != NULL)
-		p = p->next;
-	p->next = req;
+	list_add_tail(&req->list, &ctrl->list);
 	spin_unlock(&ctrl->info_list_lock);
 }
 
@@ -133,46 +124,28 @@ void cfctrl_insert_req(struct cfctrl *ctrl,
 struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
 					      struct cfctrl_request_info *req)
 {
-	struct cfctrl_request_info *p;
-	struct cfctrl_request_info *ret;
+	struct cfctrl_request_info *p, *tmp, *first;
 
 	spin_lock(&ctrl->info_list_lock);
-	if (ctrl->first_req == NULL) {
-		spin_unlock(&ctrl->info_list_lock);
-		return NULL;
-	}
-
-	if (cfctrl_req_eq(req, ctrl->first_req)) {
-		ret = ctrl->first_req;
-		caif_assert(ctrl->first_req);
-		atomic_set(&ctrl->rsp_seq_no,
-				 ctrl->first_req->sequence_no);
-		ctrl->first_req = ctrl->first_req->next;
-		spin_unlock(&ctrl->info_list_lock);
-		return ret;
-	}
+	first = list_first_entry(&ctrl->list, struct cfctrl_request_info, list);
 
-	p = ctrl->first_req;
-
-	while (p->next != NULL) {
-		if (cfctrl_req_eq(req, p->next)) {
-			pr_warning("CAIF: %s(): Requests are not "
+	list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
+		if (cfctrl_req_eq(req, p)) {
+			if (p != first)
+				pr_warning("CAIF: %s(): Requests are not "
 					"received in order\n",
 					__func__);
-			ret = p->next;
+
 			atomic_set(&ctrl->rsp_seq_no,
-					p->next->sequence_no);
-			p->next = p->next->next;
-			spin_unlock(&ctrl->info_list_lock);
-			return ret;
+					 p->sequence_no);
+			list_del(&p->list);
+			goto out;
 		}
-		p = p->next;
 	}
+	p = NULL;
+out:
 	spin_unlock(&ctrl->info_list_lock);
-
-	pr_warning("CAIF: %s(): Request does not match\n",
-		   __func__);
-	return NULL;
+	return p;
 }
 
 struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer)
@@ -388,31 +361,18 @@ void cfctrl_getstartreason_req(struct cflayer *layer)
 
 void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
 {
-	struct cfctrl_request_info *p, *req;
+	struct cfctrl_request_info *p, *tmp;
 	struct cfctrl *ctrl = container_obj(layr);
 	spin_lock(&ctrl->info_list_lock);
-
-	if (ctrl->first_req == NULL) {
-		spin_unlock(&ctrl->info_list_lock);
-		return;
-	}
-
-	if (ctrl->first_req->client_layer == adap_layer) {
-
-		req = ctrl->first_req;
-		ctrl->first_req = ctrl->first_req->next;
-		kfree(req);
-	}
-
-	p = ctrl->first_req;
-	while (p != NULL && p->next != NULL) {
-		if (p->next->client_layer == adap_layer) {
-
-			req = p->next;
-			p->next = p->next->next;
-			kfree(p->next);
+	pr_warning("CAIF: %s(): enter\n", __func__);
+
+	list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
+		if (p->client_layer == adap_layer) {
+			pr_warning("CAIF: %s(): cancel req :%d\n", __func__,
+					p->sequence_no);
+			list_del(&p->list);
+			kfree(p);
 		}
-		p = p->next;
 	}
 
 	spin_unlock(&ctrl->info_list_lock);
@@ -634,7 +594,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 	case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
 	case CAIF_CTRLCMD_FLOW_OFF_IND:
 		spin_lock(&this->info_list_lock);
-		if (this->first_req != NULL) {
+		if (!list_empty(&this->list)) {
 			pr_debug("CAIF: %s(): Received flow off in "
 				   "control layer", __func__);
 		}
-- 
cgit v1.2.3-55-g7522


From f845172531fb7410c7fb7780b1a6e51ee6df7d52 Mon Sep 17 00:00:00 2001
From: Herbert Xu
Date: Mon, 24 May 2010 00:12:34 -0700
Subject: cls_cgroup: Store classid in struct sock

Up until now cls_cgroup has relied on fetching the classid out of
the current executing thread.  This runs into trouble when a packet
processing is delayed in which case it may execute out of another
thread's context.

Furthermore, even when a packet is not delayed we may fail to
classify it if soft IRQs have been disabled, because this scenario
is indistinguishable from one where a packet unrelated to the
current thread is processed by a real soft IRQ.

In fact, the current semantics is inherently broken, as a single
skb may be constructed out of the writes of two different tasks.
A different manifestation of this problem is when the TCP stack
transmits in response of an incoming ACK.  This is currently
unclassified.

As we already have a concept of packet ownership for accounting
purposes in the skb->sk pointer, this is a natural place to store
the classid in a persistent manner.

This patch adds the cls_cgroup classid in struct sock, filling up
an existing hole on 64-bit :)

The value is set at socket creation time.  So all sockets created
via socket(2) automatically gains the ID of the thread creating it.
Whenever another process touches the socket by either reading or
writing to it, we will change the socket classid to that of the
process if it has a valid (non-zero) classid.

For sockets created on inbound connections through accept(2), we
inherit the classid of the original listening socket through
sk_clone, possibly preceding the actual accept(2) call.

In order to minimise risks, I have not made this the authoritative
classid.  For now it is only used as a backup when we execute
with soft IRQs disabled.  Once we're completely happy with its
semantics we can use it as the sole classid.

Footnote: I have rearranged the error path on cls_group module
creation.  If we didn't do this, then there is a window where
someone could create a tc rule using cls_group before the cgroup
subsystem has been registered.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/cls_cgroup.h | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/net/sock.h       | 10 +++++++-
 net/core/sock.c          | 18 ++++++++++++++
 net/sched/cls_cgroup.c   | 50 ++++++++++++++++++++++++++------------
 net/socket.c             |  9 +++++++
 5 files changed, 133 insertions(+), 17 deletions(-)
 create mode 100644 include/net/cls_cgroup.h

(limited to 'include/net')

diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
new file mode 100644
index 000000000000..ef2df1475b51
--- /dev/null
+++ b/include/net/cls_cgroup.h
@@ -0,0 +1,63 @@
+/*
+ * cls_cgroup.h			Control Group Classifier
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _NET_CLS_CGROUP_H
+#define _NET_CLS_CGROUP_H
+
+#include <linux/cgroup.h>
+#include <linux/hardirq.h>
+#include <linux/rcupdate.h>
+
+#ifdef CONFIG_CGROUPS
+struct cgroup_cls_state
+{
+	struct cgroup_subsys_state css;
+	u32 classid;
+};
+
+#ifdef CONFIG_NET_CLS_CGROUP
+static inline u32 task_cls_classid(struct task_struct *p)
+{
+	if (in_interrupt())
+		return 0;
+
+	return container_of(task_subsys_state(p, net_cls_subsys_id),
+			    struct cgroup_cls_state, css).classid;
+}
+#else
+extern int net_cls_subsys_id;
+
+static inline u32 task_cls_classid(struct task_struct *p)
+{
+	int id;
+	u32 classid;
+
+	if (in_interrupt())
+		return 0;
+
+	rcu_read_lock();
+	id = rcu_dereference(net_cls_subsys_id);
+	if (id >= 0)
+		classid = container_of(task_subsys_state(p, id),
+				       struct cgroup_cls_state, css)->classid;
+	rcu_read_unlock();
+
+	return classid;
+}
+#endif
+#else
+static inline u32 task_cls_classid(struct task_struct *p)
+{
+	return 0;
+}
+#endif
+#endif  /* _NET_CLS_CGROUP_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 5697caf8cc76..d24f382cb712 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -312,7 +312,7 @@ struct sock {
 	void			*sk_security;
 #endif
 	__u32			sk_mark;
-	/* XXX 4 bytes hole on 64 bit */
+	u32			sk_classid;
 	void			(*sk_state_change)(struct sock *sk);
 	void			(*sk_data_ready)(struct sock *sk, int bytes);
 	void			(*sk_write_space)(struct sock *sk);
@@ -1074,6 +1074,14 @@ extern void *sock_kmalloc(struct sock *sk, int size,
 extern void sock_kfree_s(struct sock *sk, void *mem, int size);
 extern void sk_send_sigurg(struct sock *sk);
 
+#ifdef CONFIG_CGROUPS
+extern void sock_update_classid(struct sock *sk);
+#else
+static inline void sock_update_classid(struct sock *sk)
+{
+}
+#endif
+
 /*
  * Functions to fill in entries in struct proto_ops when a protocol
  * does not implement a particular function.
diff --git a/net/core/sock.c b/net/core/sock.c
index bf88a167c8f2..a05ae7f9771e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -123,6 +123,7 @@
 #include <linux/net_tstamp.h>
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
+#include <net/cls_cgroup.h>
 
 #include <linux/filter.h>
 
@@ -217,6 +218,11 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 EXPORT_SYMBOL(sysctl_optmem_max);
 
+#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
+int net_cls_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_cls_subsys_id);
+#endif
+
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
 	struct timeval tv;
@@ -1050,6 +1056,16 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
 	module_put(owner);
 }
 
+#ifdef CONFIG_CGROUPS
+void sock_update_classid(struct sock *sk)
+{
+	u32 classid = task_cls_classid(current);
+
+	if (classid && classid != sk->sk_classid)
+		sk->sk_classid = classid;
+}
+#endif
+
 /**
  *	sk_alloc - All socket objects are allocated here
  *	@net: the applicable net namespace
@@ -1073,6 +1089,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		sock_lock_init(sk);
 		sock_net_set(sk, get_net(net));
 		atomic_set(&sk->sk_wmem_alloc, 1);
+
+		sock_update_classid(sk);
 	}
 
 	return sk;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 221180384fd7..78ef2c5e130b 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -16,14 +16,11 @@
 #include <linux/errno.h>
 #include <linux/skbuff.h>
 #include <linux/cgroup.h>
+#include <linux/rcupdate.h>
 #include <net/rtnetlink.h>
 #include <net/pkt_cls.h>
-
-struct cgroup_cls_state
-{
-	struct cgroup_subsys_state css;
-	u32 classid;
-};
+#include <net/sock.h>
+#include <net/cls_cgroup.h>
 
 static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
 					       struct cgroup *cgrp);
@@ -112,6 +109,10 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	struct cls_cgroup_head *head = tp->root;
 	u32 classid;
 
+	rcu_read_lock();
+	classid = task_cls_state(current)->classid;
+	rcu_read_unlock();
+
 	/*
 	 * Due to the nature of the classifier it is required to ignore all
 	 * packets originating from softirq context as accessing `current'
@@ -122,12 +123,12 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	 * calls by looking at the number of nested bh disable calls because
 	 * softirqs always disables bh.
 	 */
-	if (softirq_count() != SOFTIRQ_OFFSET)
-		return -1;
-
-	rcu_read_lock();
-	classid = task_cls_state(current)->classid;
-	rcu_read_unlock();
+	if (softirq_count() != SOFTIRQ_OFFSET) {
+		/* If there is an sk_classid we'll use that. */
+		if (!skb->sk)
+			return -1;
+		classid = skb->sk->sk_classid;
+	}
 
 	if (!classid)
 		return -1;
@@ -289,18 +290,35 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
 
 static int __init init_cgroup_cls(void)
 {
-	int ret = register_tcf_proto_ops(&cls_cgroup_ops);
-	if (ret)
-		return ret;
+	int ret;
+
 	ret = cgroup_load_subsys(&net_cls_subsys);
 	if (ret)
-		unregister_tcf_proto_ops(&cls_cgroup_ops);
+		goto out;
+
+#ifndef CONFIG_NET_CLS_CGROUP
+	/* We can't use rcu_assign_pointer because this is an int. */
+	smp_wmb();
+	net_cls_subsys_id = net_cls_subsys.subsys_id;
+#endif
+
+	ret = register_tcf_proto_ops(&cls_cgroup_ops);
+	if (ret)
+		cgroup_unload_subsys(&net_cls_subsys);
+
+out:
 	return ret;
 }
 
 static void __exit exit_cgroup_cls(void)
 {
 	unregister_tcf_proto_ops(&cls_cgroup_ops);
+
+#ifndef CONFIG_NET_CLS_CGROUP
+	net_cls_subsys_id = -1;
+	synchronize_rcu();
+#endif
+
 	cgroup_unload_subsys(&net_cls_subsys);
 }
 
diff --git a/net/socket.c b/net/socket.c
index f9f7d0872cac..367d5477d00f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -94,6 +94,7 @@
 
 #include <net/compat.h>
 #include <net/wext.h>
+#include <net/cls_cgroup.h>
 
 #include <net/sock.h>
 #include <linux/netfilter.h>
@@ -558,6 +559,8 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
 	int err;
 
+	sock_update_classid(sock->sk);
+
 	si->sock = sock;
 	si->scm = NULL;
 	si->msg = msg;
@@ -684,6 +687,8 @@ static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
 {
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
 
+	sock_update_classid(sock->sk);
+
 	si->sock = sock;
 	si->scm = NULL;
 	si->msg = msg;
@@ -777,6 +782,8 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 	if (unlikely(!sock->ops->splice_read))
 		return -EINVAL;
 
+	sock_update_classid(sock->sk);
+
 	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
 }
 
@@ -3069,6 +3076,8 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 		    size_t size, int flags)
 {
+	sock_update_classid(sock->sk);
+
 	if (sock->ops->sendpage)
 		return sock->ops->sendpage(sock, page, offset, size, flags);
 
-- 
cgit v1.2.3-55-g7522


From a69eee4988752c7196677958b4ed8f4c2b28499a Mon Sep 17 00:00:00 2001
From: Linus Torvalds
Date: Mon, 24 May 2010 07:45:43 -0700
Subject: Revert "ath9k: Group Key fix for VAPs"

This reverts commit 03ceedea972a82d343fa5c2528b3952fa9e615d5, since it
breaks resume from suspend-to-ram on Rafael's Acer Ferrari One.
NetworkManager thinks everything is ok, but it can't connect to the AP
to get an IP address after the resume.

In fact, it even breaks resume for non-ath9k chipsets: reverting it also
fixes Rafael's Toshiba Protege R500 with the iwlagn driver.  As Johannes
says:

  "Indeed, this patch needs to be reverted. That mac80211 change is wrong
   and completely unnecessary."

Reported-and-requested-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Daniel Yingqiang Ma <yma.cool@gmail.com>
Cc: John W. Linville <linville@tuxdriver.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/net/wireless/ath/ath9k/main.c | 28 +++-------------------------
 include/net/mac80211.h                |  1 -
 net/mac80211/key.c                    |  1 -
 3 files changed, 3 insertions(+), 27 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 893b552981a0..abfa0493236f 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -752,7 +752,6 @@ static int ath_key_config(struct ath_common *common,
 	struct ath_hw *ah = common->ah;
 	struct ath9k_keyval hk;
 	const u8 *mac = NULL;
-	u8 gmac[ETH_ALEN];
 	int ret = 0;
 	int idx;
 
@@ -776,30 +775,9 @@ static int ath_key_config(struct ath_common *common,
 	memcpy(hk.kv_val, key->key, key->keylen);
 
 	if (!(key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
-
-		if (key->ap_addr) {
-			/*
-			 * Group keys on hardware that supports multicast frame
-			 * key search use a mac that is the sender's address with
-			 * the high bit set instead of the app-specified address.
-			 */
-			memcpy(gmac, key->ap_addr, ETH_ALEN);
-			gmac[0] |= 0x80;
-			mac = gmac;
-
-			if (key->alg == ALG_TKIP)
-				idx = ath_reserve_key_cache_slot_tkip(common);
-			else
-				idx = ath_reserve_key_cache_slot(common);
-			if (idx < 0)
-				mac = NULL; /* no free key cache entries */
-		}
-
-		if (!mac) {
-			/* For now, use the default keys for broadcast keys. This may
-			 * need to change with virtual interfaces. */
-			idx = key->keyidx;
-		}
+		/* For now, use the default keys for broadcast keys. This may
+		 * need to change with virtual interfaces. */
+		idx = key->keyidx;
 	} else if (key->keyidx) {
 		if (WARN_ON(!sta))
 			return -EOPNOTSUPP;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 5be900d19660..e24b0363e6cb 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -831,7 +831,6 @@ struct ieee80211_key_conf {
 	u8 iv_len;
 	u8 hw_key_idx;
 	u8 flags;
-	u8 *ap_addr;
 	s8 keyidx;
 	u8 keylen;
 	u8 key[0];
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 8d4b41787dcf..e8f6e3b252d8 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -140,7 +140,6 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 				     struct ieee80211_sub_if_data,
 				     u.ap);
 
-	key->conf.ap_addr = sdata->dev->dev_addr;
 	ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf);
 
 	if (!ret) {
-- 
cgit v1.2.3-55-g7522


From 937eada45fa7bd233dfa59bbd8c0b436b02b6491 Mon Sep 17 00:00:00 2001
From: Herbert Xu
Date: Mon, 24 May 2010 23:53:37 -0700
Subject: cls_cgroup: Fix build error when built-in

There is a typo in cgroup_cls_state when cls_cgroup is built-in.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/cls_cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index ef2df1475b51..6cf44866cecd 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -31,7 +31,7 @@ static inline u32 task_cls_classid(struct task_struct *p)
 		return 0;
 
 	return container_of(task_subsys_state(p, net_cls_subsys_id),
-			    struct cgroup_cls_state, css).classid;
+			    struct cgroup_cls_state, css)->classid;
 }
 #else
 extern int net_cls_subsys_id;
-- 
cgit v1.2.3-55-g7522


From acfbe96a3035639619a6533e04d88ed4ef9ccb61 Mon Sep 17 00:00:00 2001
From: Randy Dunlap
Date: Mon, 24 May 2010 23:54:18 -0700
Subject: sock.h: fix kernel-doc warning

Fix sock.h kernel-doc warning:
Warning(include/net/sock.h:1438): No description found for parameter 'wq'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index d24f382cb712..d2a71b04a5ae 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1412,7 +1412,7 @@ static inline int sk_has_allocations(const struct sock *sk)
 
 /**
  * wq_has_sleeper - check if there are any waiting processes
- * @sk: struct socket_wq
+ * @wq: struct socket_wq
  *
  * Returns true if socket_wq has waiting processes
  *
-- 
cgit v1.2.3-55-g7522


From 4be929be34f9bdeffa40d815d32d7d60d2c7f03b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan
Date: Mon, 24 May 2010 14:33:03 -0700
Subject: kernel-wide: replace USHORT_MAX, SHORT_MAX and SHORT_MIN with
 USHRT_MAX, SHRT_MAX and SHRT_MIN

- C99 knows about USHRT_MAX/SHRT_MAX/SHRT_MIN, not
  USHORT_MAX/SHORT_MAX/SHORT_MIN.

- Make SHRT_MIN of type s16, not int, for consistency.

[akpm@linux-foundation.org: fix drivers/dma/timb_dma.c]
[akpm@linux-foundation.org: fix security/keys/keyring.c]
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/dma/timb_dma.c                |  2 +-
 drivers/scsi/fcoe/fcoe.c              |  2 +-
 drivers/scsi/mpt2sas/mpt2sas_base.c   |  2 +-
 drivers/scsi/mpt2sas/mpt2sas_config.c |  2 +-
 drivers/vhost/vhost.c                 |  2 +-
 fs/nfs/super.c                        |  4 ++--
 fs/nfsd/nfsctl.c                      |  4 ++--
 fs/ocfs2/blockcheck.c                 |  4 ++--
 include/linux/kernel.h                |  6 +++---
 include/linux/sched.h                 |  2 +-
 include/net/ip.h                      |  6 +++---
 include/net/ipv6.h                    |  6 +++---
 ipc/msg.c                             | 12 ++++++------
 ipc/util.c                            |  4 ++--
 lib/vsprintf.c                        |  2 +-
 net/9p/protocol.c                     |  2 +-
 net/dccp/options.c                    |  2 +-
 net/ipv4/udp.c                        |  8 ++++----
 net/mac80211/sta_info.c               |  2 +-
 net/sunrpc/rpcb_clnt.c                |  2 +-
 security/keys/keyring.c               |  6 +++---
 21 files changed, 41 insertions(+), 41 deletions(-)

(limited to 'include/net')

diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index 0172fa3c7a2b..a1bf77c1993f 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -188,7 +188,7 @@ static void __td_unmap_descs(struct timb_dma_desc *td_desc, bool single)
 static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc,
 	struct scatterlist *sg, bool last)
 {
-	if (sg_dma_len(sg) > USHORT_MAX) {
+	if (sg_dma_len(sg) > USHRT_MAX) {
 		dev_err(chan2dev(&td_chan->chan), "Too big sg element\n");
 		return -EINVAL;
 	}
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 9276121db1ef..44a07593de56 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -688,7 +688,7 @@ static int fcoe_shost_config(struct fc_lport *lport, struct device *dev)
 	}
 
 	if (!lport->vport)
-		fc_host_max_npiv_vports(lport->host) = USHORT_MAX;
+		fc_host_max_npiv_vports(lport->host) = USHRT_MAX;
 
 	snprintf(fc_host_symbolic_name(lport->host), FC_SYMBOLIC_NAME_SIZE,
 		 "%s v%s over %s", FCOE_NAME, FCOE_VERSION,
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
index b830d61684dd..0ec1ed389c20 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
@@ -3757,7 +3757,7 @@ _base_reset_handler(struct MPT2SAS_ADAPTER *ioc, int reset_phase)
 		if (ioc->config_cmds.status & MPT2_CMD_PENDING) {
 			ioc->config_cmds.status |= MPT2_CMD_RESET;
 			mpt2sas_base_free_smid(ioc, ioc->config_cmds.smid);
-			ioc->config_cmds.smid = USHORT_MAX;
+			ioc->config_cmds.smid = USHRT_MAX;
 			complete(&ioc->config_cmds.done);
 		}
 		break;
diff --git a/drivers/scsi/mpt2sas/mpt2sas_config.c b/drivers/scsi/mpt2sas/mpt2sas_config.c
index e762dd3e2fcb..c65442982d7b 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_config.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_config.c
@@ -258,7 +258,7 @@ mpt2sas_config_done(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
 #ifdef CONFIG_SCSI_MPT2SAS_LOGGING
 	_config_display_some_debug(ioc, smid, "config_done", mpi_reply);
 #endif
-	ioc->config_cmds.smid = USHORT_MAX;
+	ioc->config_cmds.smid = USHRT_MAX;
 	complete(&ioc->config_cmds.done);
 	return 1;
 }
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 750effe0f98b..c6fb8e968f21 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -806,7 +806,7 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 	count = indirect->len / sizeof desc;
 	/* Buffers are chained via a 16 bit next field, so
 	 * we can have at most 2^16 of these. */
-	if (count > USHORT_MAX + 1) {
+	if (count > USHRT_MAX + 1) {
 		vq_err(vq, "Indirect buffer length too big: %d\n",
 		       indirect->len);
 		return -E2BIG;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2f8b1157daa2..04214fc5c304 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1060,7 +1060,7 @@ static int nfs_parse_mount_options(char *raw,
 				goto out_nomem;
 			rc = strict_strtoul(string, 10, &option);
 			kfree(string);
-			if (rc != 0 || option > USHORT_MAX)
+			if (rc != 0 || option > USHRT_MAX)
 				goto out_invalid_value;
 			mnt->nfs_server.port = option;
 			break;
@@ -1181,7 +1181,7 @@ static int nfs_parse_mount_options(char *raw,
 				goto out_nomem;
 			rc = strict_strtoul(string, 10, &option);
 			kfree(string);
-			if (rc != 0 || option > USHORT_MAX)
+			if (rc != 0 || option > USHRT_MAX)
 				goto out_invalid_value;
 			mnt->mount_server.port = option;
 			break;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index bc3194ea01f5..508941c23af7 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -998,7 +998,7 @@ static ssize_t __write_ports_addxprt(char *buf)
 	if (sscanf(buf, "%15s %4u", transport, &port) != 2)
 		return -EINVAL;
 
-	if (port < 1 || port > USHORT_MAX)
+	if (port < 1 || port > USHRT_MAX)
 		return -EINVAL;
 
 	err = nfsd_create_serv();
@@ -1040,7 +1040,7 @@ static ssize_t __write_ports_delxprt(char *buf)
 	if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2)
 		return -EINVAL;
 
-	if (port < 1 || port > USHORT_MAX || nfsd_serv == NULL)
+	if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL)
 		return -EINVAL;
 
 	xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port);
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index b7428c5d0d3b..ec6d12339593 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -403,7 +403,7 @@ void ocfs2_block_check_compute(void *data, size_t blocksize,
 	 * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
 	 * larger than 16 bits.
 	 */
-	BUG_ON(ecc > USHORT_MAX);
+	BUG_ON(ecc > USHRT_MAX);
 
 	bc->bc_crc32e = cpu_to_le32(crc);
 	bc->bc_ecc = cpu_to_le16((u16)ecc);
@@ -508,7 +508,7 @@ void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
 	 * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
 	 * larger than 16 bits.
 	 */
-	BUG_ON(ecc > USHORT_MAX);
+	BUG_ON(ecc > USHRT_MAX);
 
 	bc->bc_crc32e = cpu_to_le32(crc);
 	bc->bc_ecc = cpu_to_le16((u16)ecc);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index cc5e3ffe9fce..a2e7c32e17e7 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -24,9 +24,9 @@
 extern const char linux_banner[];
 extern const char linux_proc_banner[];
 
-#define USHORT_MAX	((u16)(~0U))
-#define SHORT_MAX	((s16)(USHORT_MAX>>1))
-#define SHORT_MIN	(-SHORT_MAX - 1)
+#define USHRT_MAX	((u16)(~0U))
+#define SHRT_MAX	((s16)(USHRT_MAX>>1))
+#define SHRT_MIN	((s16)(-SHRT_MAX - 1))
 #define INT_MAX		((int)(~0U>>1))
 #define INT_MIN		(-INT_MAX - 1)
 #define UINT_MAX	(~0U)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 415b8f8a3f45..c0151ffd3541 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -384,7 +384,7 @@ struct user_namespace;
  * 1-3 now and depends on arch. We use "5" as safe margin, here.
  */
 #define MAPCOUNT_ELF_CORE_MARGIN	(5)
-#define DEFAULT_MAX_MAP_COUNT	(USHORT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
 
 extern int sysctl_max_map_count;
 
diff --git a/include/net/ip.h b/include/net/ip.h
index 63548f0a44b1..452f229c380a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -358,11 +358,11 @@ enum ip_defrag_users {
 	IP_DEFRAG_LOCAL_DELIVER,
 	IP_DEFRAG_CALL_RA_CHAIN,
 	IP_DEFRAG_CONNTRACK_IN,
-	__IP_DEFRAG_CONNTRACK_IN_END	= IP_DEFRAG_CONNTRACK_IN + USHORT_MAX,
+	__IP_DEFRAG_CONNTRACK_IN_END	= IP_DEFRAG_CONNTRACK_IN + USHRT_MAX,
 	IP_DEFRAG_CONNTRACK_OUT,
-	__IP_DEFRAG_CONNTRACK_OUT_END	= IP_DEFRAG_CONNTRACK_OUT + USHORT_MAX,
+	__IP_DEFRAG_CONNTRACK_OUT_END	= IP_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
 	IP_DEFRAG_CONNTRACK_BRIDGE_IN,
-	__IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX,
+	__IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
 	IP_DEFRAG_VS_IN,
 	IP_DEFRAG_VS_OUT,
 	IP_DEFRAG_VS_FWD
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index eba5cc00325a..2600b69757b8 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -354,11 +354,11 @@ struct inet_frag_queue;
 enum ip6_defrag_users {
 	IP6_DEFRAG_LOCAL_DELIVER,
 	IP6_DEFRAG_CONNTRACK_IN,
-	__IP6_DEFRAG_CONNTRACK_IN	= IP6_DEFRAG_CONNTRACK_IN + USHORT_MAX,
+	__IP6_DEFRAG_CONNTRACK_IN	= IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
 	IP6_DEFRAG_CONNTRACK_OUT,
-	__IP6_DEFRAG_CONNTRACK_OUT	= IP6_DEFRAG_CONNTRACK_OUT + USHORT_MAX,
+	__IP6_DEFRAG_CONNTRACK_OUT	= IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
 	IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
-	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX,
+	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
 };
 
 struct ip6_create_arg {
diff --git a/ipc/msg.c b/ipc/msg.c
index 9547cb7ac313..747b65507a91 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -345,19 +345,19 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
 		out.msg_rtime		= in->msg_rtime;
 		out.msg_ctime		= in->msg_ctime;
 
-		if (in->msg_cbytes > USHORT_MAX)
-			out.msg_cbytes	= USHORT_MAX;
+		if (in->msg_cbytes > USHRT_MAX)
+			out.msg_cbytes	= USHRT_MAX;
 		else
 			out.msg_cbytes	= in->msg_cbytes;
 		out.msg_lcbytes		= in->msg_cbytes;
 
-		if (in->msg_qnum > USHORT_MAX)
-			out.msg_qnum	= USHORT_MAX;
+		if (in->msg_qnum > USHRT_MAX)
+			out.msg_qnum	= USHRT_MAX;
 		else
 			out.msg_qnum	= in->msg_qnum;
 
-		if (in->msg_qbytes > USHORT_MAX)
-			out.msg_qbytes	= USHORT_MAX;
+		if (in->msg_qbytes > USHRT_MAX)
+			out.msg_qbytes	= USHRT_MAX;
 		else
 			out.msg_qbytes	= in->msg_qbytes;
 		out.msg_lqbytes		= in->msg_qbytes;
diff --git a/ipc/util.c b/ipc/util.c
index 79ce84e890f7..69a0cc13d966 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -124,8 +124,8 @@ void ipc_init_ids(struct ipc_ids *ids)
 	ids->seq = 0;
 	{
 		int seq_limit = INT_MAX/SEQ_MULTIPLIER;
-		if (seq_limit > USHORT_MAX)
-			ids->seq_max = USHORT_MAX;
+		if (seq_limit > USHRT_MAX)
+			ids->seq_max = USHRT_MAX;
 		 else
 		 	ids->seq_max = seq_limit;
 	}
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 46d34b0b74a8..20c95121d8a1 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1980,7 +1980,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 		{
 			char *s = (char *)va_arg(args, char *);
 			if (field_width == -1)
-				field_width = SHORT_MAX;
+				field_width = SHRT_MAX;
 			/* first, skip leading white space in buffer */
 			str = skip_spaces(str);
 
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 77d3aab4036b..149f82160130 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -394,7 +394,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				const char *sptr = va_arg(ap, const char *);
 				int16_t len = 0;
 				if (sptr)
-					len = MIN(strlen(sptr), USHORT_MAX);
+					len = MIN(strlen(sptr), USHRT_MAX);
 
 				errcode = p9pdu_writef(pdu, proto_version,
 								"w", len);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 1b08cae9c65b..07395f861d35 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -296,7 +296,7 @@ static inline u8 dccp_ndp_len(const u64 ndp)
 {
 	if (likely(ndp <= 0xFF))
 		return 1;
-	return likely(ndp <= USHORT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6);
+	return likely(ndp <= USHRT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6);
 }
 
 int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9de6a698f91d..baeec29fe0f1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1686,8 +1686,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			return -ENOPROTOOPT;
 		if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
 			val = 8;
-		else if (val > USHORT_MAX)
-			val = USHORT_MAX;
+		else if (val > USHRT_MAX)
+			val = USHRT_MAX;
 		up->pcslen = val;
 		up->pcflag |= UDPLITE_SEND_CC;
 		break;
@@ -1700,8 +1700,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			return -ENOPROTOOPT;
 		if (val != 0 && val < 8) /* Avoid silly minimal values.       */
 			val = 8;
-		else if (val > USHORT_MAX)
-			val = USHORT_MAX;
+		else if (val > USHRT_MAX)
+			val = USHRT_MAX;
 		up->pcrlen = val;
 		up->pcflag |= UDPLITE_RECV_CC;
 		break;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 730197591ab5..ba9360a475b0 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -259,7 +259,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	skb_queue_head_init(&sta->tx_filtered);
 
 	for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
-		sta->last_seq_ctrl[i] = cpu_to_le16(USHORT_MAX);
+		sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	printk(KERN_DEBUG "%s: Allocated STA %pM\n",
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 121105355f60..dac219a56ae1 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -783,7 +783,7 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
 	port = ntohl(*p);
 	dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid,
 			task->tk_msg.rpc_proc->p_name, port);
-	if (unlikely(port > USHORT_MAX))
+	if (unlikely(port > USHRT_MAX))
 		return -EIO;
 
 	rpcb->r_port = port;
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index ef03a82a0135..d37f713e73ce 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -669,7 +669,7 @@ static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
 	struct keyring_list *klist =
 		container_of(rcu, struct keyring_list, rcu);
 
-	if (klist->delkey != USHORT_MAX)
+	if (klist->delkey != USHRT_MAX)
 		key_put(klist->keys[klist->delkey]);
 	kfree(klist);
 }
@@ -746,7 +746,7 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
 			max += klist->maxkeys;
 
 		ret = -ENFILE;
-		if (max > USHORT_MAX - 1)
+		if (max > USHRT_MAX - 1)
 			goto error_quota;
 		size = sizeof(*klist) + sizeof(struct key *) * max;
 		if (size > PAGE_SIZE)
@@ -763,7 +763,7 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
 			       sizeof(struct key *) * klist->nkeys);
 			nklist->delkey = klist->nkeys;
 			nklist->nkeys = klist->nkeys + 1;
-			klist->delkey = USHORT_MAX;
+			klist->delkey = USHRT_MAX;
 		} else {
 			nklist->nkeys = 1;
 			nklist->delkey = 0;
-- 
cgit v1.2.3-55-g7522


From ea16f912a6ad9fe43131a2e2a108f089c432d510 Mon Sep 17 00:00:00 2001
From: Herbert Xu
Date: Tue, 25 May 2010 18:53:57 -0700
Subject: cls_cgroup: Initialise classid when module is absent

When the cls_cgroup module is not loaded, task_cls_classid will
return an uninitialised classid instead of zero.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/cls_cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 6cf44866cecd..726cc3536409 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -39,7 +39,7 @@ extern int net_cls_subsys_id;
 static inline u32 task_cls_classid(struct task_struct *p)
 {
 	int id;
-	u32 classid;
+	u32 classid = 0;
 
 	if (in_interrupt())
 		return 0;
-- 
cgit v1.2.3-55-g7522


From ff937938e7781b2c1bffce0a5400af308e3946d5 Mon Sep 17 00:00:00 2001
From: Dan Carpenter
Date: Wed, 26 May 2010 00:40:11 -0700
Subject: sctp: dubious bitfields in sctp_transport

Sparse complains because these one-bit bitfields are signed.
  include/net/sctp/structs.h:879:24: error: dubious one-bit signed bitfield
  include/net/sctp/structs.h:889:31: error: dubious one-bit signed bitfield
  include/net/sctp/structs.h:895:26: error: dubious one-bit signed bitfield
  include/net/sctp/structs.h:898:31: error: dubious one-bit signed bitfield
  include/net/sctp/structs.h:901:27: error: dubious one-bit signed bitfield

It doesn't cause a problem in the current code, but it would be better
to clean it up.  This was introduced by c0058a35aacc7: "sctp: Save some
room in the sctp_transport by using bitfields".

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 6173c619913a..4b860116e096 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -876,7 +876,7 @@ struct sctp_transport {
 
 	/* Reference counting. */
 	atomic_t refcnt;
-	int	 dead:1,
+	__u32	 dead:1,
 		/* RTO-Pending : A flag used to track if one of the DATA
 		 *		chunks sent to this address is currently being
 		 *		used to compute a RTT. If this flag is 0,
-- 
cgit v1.2.3-55-g7522


From 8a74ad60a546b13bd1096b2a61a7a5c6fd9ae17c Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Wed, 26 May 2010 19:20:18 +0000
Subject: net: fix lock_sock_bh/unlock_sock_bh

This new sock lock primitive was introduced to speedup some user context
socket manipulation. But it is unsafe to protect two threads, one using
regular lock_sock/release_sock, one using lock_sock_bh/unlock_sock_bh

This patch changes lock_sock_bh to be careful against 'owned' state.
If owned is found to be set, we must take the slow path.
lock_sock_bh() now returns a boolean to say if the slow path was taken,
and this boolean is used at unlock_sock_bh time to call the appropriate
unlock function.

After this change, BH are either disabled or enabled during the
lock_sock_bh/unlock_sock_bh protected section. This might be misleading,
so we rename these functions to lock_sock_fast()/unlock_sock_fast().

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h  | 20 ++++++++++++++------
 net/core/datagram.c |  6 ++++--
 net/core/sock.c     | 33 +++++++++++++++++++++++++++++++++
 net/ipv4/udp.c      | 14 ++++++++------
 net/ipv6/udp.c      |  5 +++--
 5 files changed, 62 insertions(+), 16 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index d2a71b04a5ae..ca241ea14875 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1026,15 +1026,23 @@ extern void release_sock(struct sock *sk);
 				SINGLE_DEPTH_NESTING)
 #define bh_unlock_sock(__sk)	spin_unlock(&((__sk)->sk_lock.slock))
 
-static inline void lock_sock_bh(struct sock *sk)
+extern bool lock_sock_fast(struct sock *sk);
+/**
+ * unlock_sock_fast - complement of lock_sock_fast
+ * @sk: socket
+ * @slow: slow mode
+ *
+ * fast unlock socket for user context.
+ * If slow mode is on, we call regular release_sock()
+ */
+static inline void unlock_sock_fast(struct sock *sk, bool slow)
 {
-	spin_lock_bh(&sk->sk_lock.slock);
+	if (slow)
+		release_sock(sk);
+	else
+		spin_unlock_bh(&sk->sk_lock.slock);
 }
 
-static inline void unlock_sock_bh(struct sock *sk)
-{
-	spin_unlock_bh(&sk->sk_lock.slock);
-}
 
 extern struct sock		*sk_alloc(struct net *net, int family,
 					  gfp_t priority,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index e0097531417a..f5b6f43a4c2e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -229,15 +229,17 @@ EXPORT_SYMBOL(skb_free_datagram);
 
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
 {
+	bool slow;
+
 	if (likely(atomic_read(&skb->users) == 1))
 		smp_rmb();
 	else if (likely(!atomic_dec_and_test(&skb->users)))
 		return;
 
-	lock_sock_bh(sk);
+	slow = lock_sock_fast(sk);
 	skb_orphan(skb);
 	sk_mem_reclaim_partial(sk);
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 
 	/* skb is now orphaned, can be freed outside of locked section */
 	__kfree_skb(skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index 37fe9b6adade..2cf7f9f7e775 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2007,6 +2007,39 @@ void release_sock(struct sock *sk)
 }
 EXPORT_SYMBOL(release_sock);
 
+/**
+ * lock_sock_fast - fast version of lock_sock
+ * @sk: socket
+ *
+ * This version should be used for very small section, where process wont block
+ * return false if fast path is taken
+ *   sk_lock.slock locked, owned = 0, BH disabled
+ * return true if slow path is taken
+ *   sk_lock.slock unlocked, owned = 1, BH enabled
+ */
+bool lock_sock_fast(struct sock *sk)
+{
+	might_sleep();
+	spin_lock_bh(&sk->sk_lock.slock);
+
+	if (!sk->sk_lock.owned)
+		/*
+		 * Note : We must disable BH
+		 */
+		return false;
+
+	__lock_sock(sk);
+	sk->sk_lock.owned = 1;
+	spin_unlock(&sk->sk_lock.slock);
+	/*
+	 * The sk_lock has mutex_lock() semantics here:
+	 */
+	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+	local_bh_enable();
+	return true;
+}
+EXPORT_SYMBOL(lock_sock_fast);
+
 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 {
 	struct timeval tv;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9de6a698f91d..b9d0d409516f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1063,10 +1063,11 @@ static unsigned int first_packet_length(struct sock *sk)
 	spin_unlock_bh(&rcvq->lock);
 
 	if (!skb_queue_empty(&list_kill)) {
-		lock_sock_bh(sk);
+		bool slow = lock_sock_fast(sk);
+
 		__skb_queue_purge(&list_kill);
 		sk_mem_reclaim_partial(sk);
-		unlock_sock_bh(sk);
+		unlock_sock_fast(sk, slow);
 	}
 	return res;
 }
@@ -1123,6 +1124,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	int peeked;
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
+	bool slow;
 
 	/*
 	 *	Check any passed addresses
@@ -1197,10 +1199,10 @@ out:
 	return err;
 
 csum_copy_err:
-	lock_sock_bh(sk);
+	slow = lock_sock_fast(sk);
 	if (!skb_kill_datagram(sk, skb, flags))
 		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 
 	if (noblock)
 		return -EAGAIN;
@@ -1625,9 +1627,9 @@ int udp_rcv(struct sk_buff *skb)
 
 void udp_destroy_sock(struct sock *sk)
 {
-	lock_sock_bh(sk);
+	bool slow = lock_sock_fast(sk);
 	udp_flush_pending_frames(sk);
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 }
 
 /*
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3d7a2c0b836a..87be58673b55 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -328,6 +328,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
 	int is_udp4;
+	bool slow;
 
 	if (addr_len)
 		*addr_len=sizeof(struct sockaddr_in6);
@@ -424,7 +425,7 @@ out:
 	return err;
 
 csum_copy_err:
-	lock_sock_bh(sk);
+	slow = lock_sock_fast(sk);
 	if (!skb_kill_datagram(sk, skb, flags)) {
 		if (is_udp4)
 			UDP_INC_STATS_USER(sock_net(sk),
@@ -433,7 +434,7 @@ csum_copy_err:
 			UDP6_INC_STATS_USER(sock_net(sk),
 					UDP_MIB_INERRORS, is_udplite);
 	}
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 
 	if (flags & MSG_DONTWAIT)
 		return -EAGAIN;
-- 
cgit v1.2.3-55-g7522


From b1faf5666438090a4dc4fceac8502edc7788b7e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Mon, 31 May 2010 23:44:05 -0700
Subject: net: sock_queue_err_skb() dont mess with sk_forward_alloc

Correct sk_forward_alloc handling for error_queue would need to use a
backlog of frames that softirq handler could not deliver because socket
is owned by user thread. Or extend backlog processing to be able to
process normal and error packets.

Another possibility is to not use mem charge for error queue, this is
what I implemented in this patch.

Note: this reverts commit 29030374
(net: fix sk_forward_alloc corruptions), since we dont need to lock
socket anymore.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 15 +--------------
 net/core/skbuff.c  | 30 ++++++++++++++++++++++++++++--
 net/ipv4/udp.c     |  6 ++----
 net/ipv6/udp.c     |  6 ++----
 4 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index ca241ea14875..731150d52799 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1524,20 +1524,7 @@ extern void sk_stop_timer(struct sock *sk, struct timer_list* timer);
 
 extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 
-static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
-{
-	/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
-	   number of warnings when compiling with -W --ANK
-	 */
-	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-	    (unsigned)sk->sk_rcvbuf)
-		return -ENOMEM;
-	skb_set_owner_r(skb, sk);
-	skb_queue_tail(&sk->sk_error_queue, skb);
-	if (!sock_flag(sk, SOCK_DEAD))
-		sk->sk_data_ready(sk, skb->len);
-	return 0;
-}
+extern int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
 
 /*
  *	Recover an error report and clear atomically
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4e7ac09c281a..9f07e749d7b1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2965,6 +2965,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
 }
 EXPORT_SYMBOL_GPL(skb_cow_data);
 
+static void sock_rmem_free(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+}
+
+/*
+ * Note: We dont mem charge error packets (no sk_forward_alloc changes)
+ */
+int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
+{
+	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+	    (unsigned)sk->sk_rcvbuf)
+		return -ENOMEM;
+
+	skb_orphan(skb);
+	skb->sk = sk;
+	skb->destructor = sock_rmem_free;
+	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+
+	skb_queue_tail(&sk->sk_error_queue, skb);
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, skb->len);
+	return 0;
+}
+EXPORT_SYMBOL(sock_queue_err_skb);
+
 void skb_tstamp_tx(struct sk_buff *orig_skb,
 		struct skb_shared_hwtstamps *hwtstamps)
 {
@@ -2997,9 +3025,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
 
-	bh_lock_sock(sk);
 	err = sock_queue_err_skb(sk, skb);
-	bh_unlock_sock(sk);
 
 	if (err)
 		kfree_skb(skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 50678f9a2763..eec4ff456e33 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -633,11 +633,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
 	if (!inet->recverr) {
 		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
 			goto out;
-	} else {
-		bh_lock_sock(sk);
+	} else
 		ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
-		bh_unlock_sock(sk);
-	}
+
 	sk->sk_err = err;
 	sk->sk_error_report(sk);
 out:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3048f906c042..87be58673b55 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -466,11 +466,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
 		goto out;
 
-	if (np->recverr) {
-		bh_lock_sock(sk);
+	if (np->recverr)
 		ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
-		bh_unlock_sock(sk);
-	}
+
 	sk->sk_err = err;
 	sk->sk_error_report(sk);
 out:
-- 
cgit v1.2.3-55-g7522


From bc135b23d01acf7ee926aaf75b0020c86d3869f9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Wed, 2 Jun 2010 03:23:51 -0700
Subject: net: Define accessors to manipulate QDISC_STATE_RUNNING

Define three helpers to manipulate QDISC_STATE_RUNNIG flag, that a
second patch will move on another location.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h   |  2 +-
 include/net/sch_generic.h | 15 +++++++++++++++
 net/core/dev.c            |  4 ++--
 net/sched/sch_generic.c   |  4 ++--
 4 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 9d4d87cc970e..d9549af6929a 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -95,7 +95,7 @@ extern void __qdisc_run(struct Qdisc *q);
 
 static inline void qdisc_run(struct Qdisc *q)
 {
-	if (!test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
+	if (qdisc_run_begin(q))
 		__qdisc_run(q);
 }
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 03ca5d826757..9707daed761e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -76,6 +76,21 @@ struct Qdisc {
 	struct rcu_head     rcu_head;
 };
 
+static inline bool qdisc_is_running(struct Qdisc *qdisc)
+{
+	return test_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+}
+
+static inline bool qdisc_run_begin(struct Qdisc *qdisc)
+{
+	return !test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+}
+
+static inline void qdisc_run_end(struct Qdisc *qdisc)
+{
+	clear_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+}
+
 struct Qdisc_class_ops {
 	/* Child qdisc manipulation */
 	struct netdev_queue *	(*select_queue)(struct Qdisc *, struct tcmsg *);
diff --git a/net/core/dev.c b/net/core/dev.c
index 983a3c1d65c4..2733226d90b2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2047,7 +2047,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		kfree_skb(skb);
 		rc = NET_XMIT_DROP;
 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
-		   !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
+		   qdisc_run_begin(q)) {
 		/*
 		 * This is a work-conserving queue; there are no old skbs
 		 * waiting to be sent out; and the qdisc is not running -
@@ -2059,7 +2059,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
 			__qdisc_run(q);
 		else
-			clear_bit(__QDISC_STATE_RUNNING, &q->state);
+			qdisc_run_end(q);
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index bd1892fe4b21..37b86eab6779 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -205,7 +205,7 @@ void __qdisc_run(struct Qdisc *q)
 		}
 	}
 
-	clear_bit(__QDISC_STATE_RUNNING, &q->state);
+	qdisc_run_end(q);
 }
 
 unsigned long dev_trans_start(struct net_device *dev)
@@ -797,7 +797,7 @@ static bool some_qdisc_is_busy(struct net_device *dev)
 
 		spin_lock_bh(root_lock);
 
-		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
+		val = (qdisc_is_running(q) ||
 		       test_bit(__QDISC_STATE_SCHED, &q->state));
 
 		spin_unlock_bh(root_lock);
-- 
cgit v1.2.3-55-g7522


From 371121057607e3127e19b3fa094330181b5b031e Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Wed, 2 Jun 2010 03:24:13 -0700
Subject: net: QDISC_STATE_RUNNING dont need atomic bit ops

__QDISC_STATE_RUNNING is always changed while qdisc lock is held.

We can avoid two atomic operations in xmit path, if we move this bit in
a new __state container.

Location of this __state container is carefully chosen so that fast path
only dirties one qdisc cache line.

THROTTLED bit could later be moved into this __state location too, to
avoid dirtying first qdisc cache line.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9707daed761e..b3591e4a514c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -23,11 +23,17 @@ struct qdisc_rate_table {
 };
 
 enum qdisc_state_t {
-	__QDISC_STATE_RUNNING,
 	__QDISC_STATE_SCHED,
 	__QDISC_STATE_DEACTIVATED,
 };
 
+/*
+ * following bits are only changed while qdisc lock is held
+ */
+enum qdisc___state_t {
+	__QDISC___STATE_RUNNING,
+};
+
 struct qdisc_size_table {
 	struct list_head	list;
 	struct tc_sizespec	szopts;
@@ -72,23 +78,24 @@ struct Qdisc {
 	unsigned long		state;
 	struct sk_buff_head	q;
 	struct gnet_stats_basic_packed bstats;
+	unsigned long		__state;
 	struct gnet_stats_queue	qstats;
 	struct rcu_head     rcu_head;
 };
 
 static inline bool qdisc_is_running(struct Qdisc *qdisc)
 {
-	return test_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+	return test_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
 }
 
 static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 {
-	return !test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+	return !__test_and_set_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
 }
 
 static inline void qdisc_run_end(struct Qdisc *qdisc)
 {
-	clear_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+	__clear_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
 }
 
 struct Qdisc_class_ops {
-- 
cgit v1.2.3-55-g7522


From 79640a4ca6955e3ebdb7038508fa7a0cd7fa5527 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Wed, 2 Jun 2010 05:09:29 -0700
Subject: net: add additional lock to qdisc to increase throughput

When many cpus compete for sending frames on a given qdisc, the qdisc
spinlock suffers from very high contention.

The cpu owning __QDISC_STATE_RUNNING bit has same priority to acquire
the lock, and cannot dequeue packets fast enough, since it must wait for
this lock for each dequeued packet.

One solution to this problem is to force all cpus spinning on a second
lock before trying to get the main lock, when/if they see
__QDISC_STATE_RUNNING already set.

The owning cpu then compete with at most one other cpu for the main
lock, allowing for higher dequeueing rate.

Based on a previous patch from Alexander Duyck. I added the heuristic to
avoid the atomic in fast path, and put the new lock far away from the
cache line used by the dequeue worker. Also try to release the busylock
lock as late as possible.

Tests with following script gave a boost from ~50.000 pps to ~600.000
pps on a dual quad core machine (E5450 @3.00GHz), tg3 driver.
(A single netperf flow can reach ~800.000 pps on this platform)

for j in `seq 0 3`; do
  for i in `seq 0 7`; do
    netperf -H 192.168.0.1 -t UDP_STREAM -l 60 -N -T $i -- -m 6 &
  done
done

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  3 ++-
 net/core/dev.c            | 29 +++++++++++++++++++++++++----
 net/sched/sch_generic.c   |  1 +
 3 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b3591e4a514c..b35301b0c7b6 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -80,7 +80,8 @@ struct Qdisc {
 	struct gnet_stats_basic_packed bstats;
 	unsigned long		__state;
 	struct gnet_stats_queue	qstats;
-	struct rcu_head     rcu_head;
+	struct rcu_head		rcu_head;
+	spinlock_t		busylock;
 };
 
 static inline bool qdisc_is_running(struct Qdisc *qdisc)
diff --git a/net/core/dev.c b/net/core/dev.c
index 2733226d90b2..ffca5c1066fa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2040,8 +2040,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 				 struct netdev_queue *txq)
 {
 	spinlock_t *root_lock = qdisc_lock(q);
+	bool contended = qdisc_is_running(q);
 	int rc;
 
+	/*
+	 * Heuristic to force contended enqueues to serialize on a
+	 * separate lock before trying to get qdisc main lock.
+	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
+	 * and dequeue packets faster.
+	 */
+	if (unlikely(contended))
+		spin_lock(&q->busylock);
+
 	spin_lock(root_lock);
 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
 		kfree_skb(skb);
@@ -2056,19 +2066,30 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
 			skb_dst_force(skb);
 		__qdisc_update_bstats(q, skb->len);
-		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
+		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
+			if (unlikely(contended)) {
+				spin_unlock(&q->busylock);
+				contended = false;
+			}
 			__qdisc_run(q);
-		else
+		} else
 			qdisc_run_end(q);
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
 		skb_dst_force(skb);
 		rc = qdisc_enqueue_root(skb, q);
-		qdisc_run(q);
+		if (qdisc_run_begin(q)) {
+			if (unlikely(contended)) {
+				spin_unlock(&q->busylock);
+				contended = false;
+			}
+			__qdisc_run(q);
+		}
 	}
 	spin_unlock(root_lock);
-
+	if (unlikely(contended))
+		spin_unlock(&q->busylock);
 	return rc;
 }
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 37b86eab6779..d20fcd2a5519 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -561,6 +561,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 
 	INIT_LIST_HEAD(&sch->list);
 	skb_queue_head_init(&sch->q);
+	spin_lock_init(&sch->busylock);
 	sch->ops = ops;
 	sch->enqueue = ops->enqueue;
 	sch->dequeue = ops->dequeue;
-- 
cgit v1.2.3-55-g7522


From c2d9ba9bce8d7323ca96f239e1f505c14d6244fb Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Tue, 1 Jun 2010 06:51:19 +0000
Subject: net: CONFIG_NET_NS reduction

Use read_pnet() and write_pnet() to reduce number of ifdef CONFIG_NET_NS

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h            |  6 +-----
 include/net/cfg80211.h               | 15 ++-------------
 include/net/genetlink.h              | 15 ++-------------
 include/net/netfilter/nf_conntrack.h |  6 +-----
 include/net/sock.h                   | 10 ++--------
 net/ipv6/addrlabel.c                 |  6 +-----
 net/netfilter/nf_conntrack_core.c    |  8 ++------
 7 files changed, 11 insertions(+), 55 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a24916156f4e..bd6b75317d5f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1087,11 +1087,7 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 static inline
 struct net *dev_net(const struct net_device *dev)
 {
-#ifdef CONFIG_NET_NS
-	return dev->nd_net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&dev->nd_net);
 }
 
 static inline
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index b44a2e5321a3..e7ebeb8bdf71 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1330,26 +1330,15 @@ struct wiphy {
 	char priv[0] __attribute__((__aligned__(NETDEV_ALIGN)));
 };
 
-#ifdef CONFIG_NET_NS
-static inline struct net *wiphy_net(struct wiphy *wiphy)
-{
-	return wiphy->_net;
-}
-
-static inline void wiphy_net_set(struct wiphy *wiphy, struct net *net)
-{
-	wiphy->_net = net;
-}
-#else
 static inline struct net *wiphy_net(struct wiphy *wiphy)
 {
-	return &init_net;
+	return read_pnet(&wiphy->_net);
 }
 
 static inline void wiphy_net_set(struct wiphy *wiphy, struct net *net)
 {
+	write_pnet(&wiphy->_net, net);
 }
-#endif
 
 /**
  * wiphy_priv - return priv from wiphy
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index eb551baafc04..f7dcd2c70412 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -68,26 +68,15 @@ struct genl_info {
 #endif
 };
 
-#ifdef CONFIG_NET_NS
 static inline struct net *genl_info_net(struct genl_info *info)
 {
-	return info->_net;
+	return read_pnet(&info->_net);
 }
 
 static inline void genl_info_net_set(struct genl_info *info, struct net *net)
 {
-	info->_net = net;
+	write_pnet(&info->_net, net);
 }
-#else
-static inline struct net *genl_info_net(struct genl_info *info)
-{
-	return &init_net;
-}
-
-static inline void genl_info_net_set(struct genl_info *info, struct net *net)
-{
-}
-#endif
 
 /**
  * struct genl_ops - generic netlink operations
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index bde095f7e845..bbfdd9453087 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -152,11 +152,7 @@ extern struct net init_net;
 
 static inline struct net *nf_ct_net(const struct nf_conn *ct)
 {
-#ifdef CONFIG_NET_NS
-	return ct->ct_net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&ct->ct_net);
 }
 
 /* Alter reply tuple (maybe alter helper). */
diff --git a/include/net/sock.h b/include/net/sock.h
index ca241ea14875..3461e5d1e9ad 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1724,19 +1724,13 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_e
 static inline
 struct net *sock_net(const struct sock *sk)
 {
-#ifdef CONFIG_NET_NS
-	return sk->sk_net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&sk->sk_net);
 }
 
 static inline
 void sock_net_set(struct sock *sk, struct net *net)
 {
-#ifdef CONFIG_NET_NS
-	sk->sk_net = net;
-#endif
+	write_pnet(&sk->sk_net, net);
 }
 
 /*
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 8c4348cb1950..f0e774cea386 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -53,11 +53,7 @@ static struct ip6addrlbl_table
 static inline
 struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
 {
-#ifdef CONFIG_NET_NS
-	return lbl->lbl_net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&lbl->lbl_net);
 }
 
 /*
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index eeeb8bc73982..77288980fae0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -619,9 +619,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
 	ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL;
 	/* Don't set timer yet: wait for confirmation */
 	setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
-#ifdef CONFIG_NET_NS
-	ct->ct_net = net;
-#endif
+	write_pnet(&ct->ct_net, net);
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	if (zone) {
 		struct nf_conntrack_zone *nf_ct_zone;
@@ -1363,9 +1361,7 @@ static int nf_conntrack_init_init_net(void)
 		goto err_extend;
 #endif
 	/* Set up fake conntrack: to never be deleted, not in any hashes */
-#ifdef CONFIG_NET_NS
-	nf_conntrack_untracked.ct_net = &init_net;
-#endif
+	write_pnet(&nf_conntrack_untracked.ct_net, &init_net);
 	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
 	/*  - and look it like as a confirmed connection */
 	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
-- 
cgit v1.2.3-55-g7522


From 20c59de2e6b6bc74bbf714dcd4e720afe8d516cf Mon Sep 17 00:00:00 2001
From: Arnaud Ebalard
Date: Tue, 1 Jun 2010 21:35:01 +0000
Subject: ipv6: Refactor update of IPv6 flowi destination address for srcrt
 (RH) option

There are more than a dozen occurrences of following code in the
IPv6 stack:

    if (opt && opt->srcrt) {
            struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
            ipv6_addr_copy(&final, &fl.fl6_dst);
            ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
            final_p = &final;
    }

Replace those with a helper. Note that the helper overrides final_p
in all cases. This is ok as final_p was previously initialized to
NULL when declared.

Signed-off-by: Arnaud Ebalard <arno@natisbad.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h               |  4 ++++
 net/dccp/ipv6.c                  | 30 ++++++------------------------
 net/ipv6/af_inet6.c              |  9 ++-------
 net/ipv6/datagram.c              | 18 ++++--------------
 net/ipv6/exthdrs.c               | 24 ++++++++++++++++++++++++
 net/ipv6/inet6_connection_sock.c |  9 ++-------
 net/ipv6/raw.c                   | 10 ++--------
 net/ipv6/syncookies.c            |  9 ++-------
 net/ipv6/tcp_ipv6.c              | 27 ++++++---------------------
 net/ipv6/udp.c                   | 11 +++--------
 10 files changed, 55 insertions(+), 96 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 2600b69757b8..f5808d596aab 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -551,6 +551,10 @@ extern int 			ipv6_ext_hdr(u8 nexthdr);
 
 extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type);
 
+extern struct in6_addr *fl6_update_dst(struct flowi *fl,
+				       const struct ipv6_txoptions *opt,
+				       struct in6_addr *orig);
+
 /*
  *	socket options (ipv6_sockglue.c)
  */
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 091698899594..6e3f32575df7 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -248,7 +248,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff *skb;
 	struct ipv6_txoptions *opt = NULL;
-	struct in6_addr *final_p = NULL, final;
+	struct in6_addr *final_p, final;
 	struct flowi fl;
 	int err = -1;
 	struct dst_entry *dst;
@@ -265,13 +265,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 
 	opt = np->opt;
 
-	if (opt != NULL && opt->srcrt != NULL) {
-		const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
-
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
@@ -545,19 +539,13 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		goto out_overflow;
 
 	if (dst == NULL) {
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 
 		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_DCCP;
 		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
-		if (opt != NULL && opt->srcrt != NULL) {
-			const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
-
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, opt, &final);
 		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
@@ -885,7 +873,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct in6_addr *saddr = NULL, *final_p = NULL, final;
+	struct in6_addr *saddr = NULL, *final_p, final;
 	struct flowi fl;
 	struct dst_entry *dst;
 	int addr_type;
@@ -988,13 +976,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl.fl_ip_sport = inet->inet_sport;
 	security_sk_classify_flow(sk, &fl);
 
-	if (np->opt != NULL && np->opt->srcrt != NULL) {
-		const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, np->opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e733942dafe1..94b1b9c954bf 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -651,7 +651,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 
 	if (dst == NULL) {
 		struct inet_sock *inet = inet_sk(sk);
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 
 		memset(&fl, 0, sizeof(fl));
@@ -665,12 +665,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 		fl.fl_ip_sport = inet->inet_sport;
 		security_sk_classify_flow(sk, &fl);
 
-		if (np->opt && np->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, np->opt, &final);
 
 		err = ip6_dst_lookup(sk, &dst, &fl);
 		if (err) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 712684687c9a..7d929a22cbc2 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -38,10 +38,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	struct sockaddr_in6	*usin = (struct sockaddr_in6 *) uaddr;
 	struct inet_sock      	*inet = inet_sk(sk);
 	struct ipv6_pinfo      	*np = inet6_sk(sk);
-	struct in6_addr		*daddr, *final_p = NULL, final;
+	struct in6_addr		*daddr, *final_p, final;
 	struct dst_entry	*dst;
 	struct flowi		fl;
 	struct ip6_flowlabel	*flowlabel = NULL;
+	struct ipv6_txoptions   *opt;
 	int			addr_type;
 	int			err;
 
@@ -155,19 +156,8 @@ ipv4_connected:
 
 	security_sk_classify_flow(sk, &fl);
 
-	if (flowlabel) {
-		if (flowlabel->opt && flowlabel->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
-	} else if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	opt = flowlabel ? flowlabel->opt : np->opt;
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 8a659f92d17a..853a633a94d4 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -874,3 +874,27 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
 	return opt;
 }
 
+/**
+ * fl6_update_dst - update flowi destination address with info given
+ *                  by srcrt option, if any.
+ *
+ * @fl: flowi for which fl6_dst is to be updated
+ * @opt: struct ipv6_txoptions in which to look for srcrt opt
+ * @orig: copy of original fl6_dst address if modified
+ *
+ * Returns NULL if no txoptions or no srcrt, otherwise returns orig
+ * and initial value of fl->fl6_dst set in orig
+ */
+struct in6_addr *fl6_update_dst(struct flowi *fl,
+				const struct ipv6_txoptions *opt,
+				struct in6_addr *orig)
+{
+	if (!opt || !opt->srcrt)
+		return NULL;
+
+	ipv6_addr_copy(orig, &fl->fl6_dst);
+	ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr);
+	return orig;
+}
+
+EXPORT_SYMBOL_GPL(fl6_update_dst);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 0c5e3c3b7fd5..8a1628023bd1 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -185,7 +185,7 @@ int inet6_csk_xmit(struct sk_buff *skb)
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct flowi fl;
 	struct dst_entry *dst;
-	struct in6_addr *final_p = NULL, final;
+	struct in6_addr *final_p, final;
 
 	memset(&fl, 0, sizeof(fl));
 	fl.proto = sk->sk_protocol;
@@ -199,12 +199,7 @@ int inet6_csk_xmit(struct sk_buff *skb)
 	fl.fl_ip_dport = inet->inet_dport;
 	security_sk_classify_flow(sk, &fl);
 
-	if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, np->opt, &final);
 
 	dst = __inet6_csk_dst_check(sk, np->dst_cookie);
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4a4dcbe4f8b2..864eb8e03b1b 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -725,7 +725,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 {
 	struct ipv6_txoptions opt_space;
 	struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
-	struct in6_addr *daddr, *final_p = NULL, final;
+	struct in6_addr *daddr, *final_p, final;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct raw6_sock *rp = raw6_sk(sk);
@@ -847,13 +847,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 
-	/* merge ip6_build_xmit from ip6_output */
-	if (opt && opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
 		fl.oif = np->mcast_oif;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 34d1f0690d7e..12383705dbad 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -240,17 +240,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	 * me if there is a preferred way.
 	 */
 	{
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_TCP;
 		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
-		if (np->opt && np->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, np->opt, &final);
 		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.mark = sk->sk_mark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2b7c3a100e2c..e487080d02db 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -129,7 +129,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct in6_addr *saddr = NULL, *final_p = NULL, final;
+	struct in6_addr *saddr = NULL, *final_p, final;
 	struct flowi fl;
 	struct dst_entry *dst;
 	int addr_type;
@@ -250,12 +250,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl.fl_ip_dport = usin->sin6_port;
 	fl.fl_ip_sport = inet->inet_sport;
 
-	if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, np->opt, &final);
 
 	security_sk_classify_flow(sk, &fl);
 
@@ -477,7 +472,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff * skb;
 	struct ipv6_txoptions *opt = NULL;
-	struct in6_addr * final_p = NULL, final;
+	struct in6_addr * final_p, final;
 	struct flowi fl;
 	struct dst_entry *dst;
 	int err = -1;
@@ -494,12 +489,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	security_req_classify_flow(req, &fl);
 
 	opt = np->opt;
-	if (opt && opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
@@ -1392,18 +1382,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		goto out_overflow;
 
 	if (dst == NULL) {
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 
 		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_TCP;
 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
-		if (opt && opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, opt, &final);
 		ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.mark = sk->sk_mark;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3048f906c042..4aea57dec75f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -929,7 +929,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
-	struct in6_addr *daddr, *final_p = NULL, final;
+	struct in6_addr *daddr, *final_p, final;
 	struct ipv6_txoptions *opt = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct flowi fl;
@@ -1099,14 +1099,9 @@ do_udp_sendmsg:
 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 	fl.fl_ip_sport = inet->inet_sport;
 
-	/* merge ip6_build_xmit from ip6_output */
-	if (opt && opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
+	final_p = fl6_update_dst(&fl, opt, &final);
+	if (final_p)
 		connected = 0;
-	}
 
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) {
 		fl.oif = np->mcast_oif;
-- 
cgit v1.2.3-55-g7522


From bc10502dba37d3b210efd9f3867212298f13b78e Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Thu, 3 Jun 2010 03:21:52 -0700
Subject: net: use __packed annotation

cleanup patch.

Use new __packed annotation in net/ and include/
(except netfilter)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h   |  2 +-
 include/linux/if_fddi.h    |  8 ++---
 include/linux/if_frad.h    |  2 +-
 include/linux/if_hippi.h   |  8 ++---
 include/linux/if_pppox.h   |  8 ++---
 include/linux/ipv6.h       |  4 +--
 include/linux/isdnif.h     |  2 +-
 include/linux/mISDNif.h    |  2 +-
 include/linux/nbd.h        |  2 +-
 include/linux/ncp.h        | 10 +++---
 include/linux/ncp_fs_sb.h  | 14 ++++----
 include/linux/phonet.h     |  4 +--
 include/linux/rds.h        | 12 +++----
 include/linux/sctp.h       | 80 +++++++++++++++++++++++-----------------------
 include/linux/wlp.h        | 22 ++++++-------
 include/net/dn_dev.h       |  8 ++---
 include/net/dn_nsp.h       | 16 +++++-----
 include/net/ip6_tunnel.h   |  2 +-
 include/net/ipx.h          |  8 ++---
 include/net/mip6.h         |  2 +-
 include/net/ndisc.h        |  2 +-
 include/net/sctp/structs.h |  4 +--
 include/rxrpc/packet.h     |  8 ++---
 net/bluetooth/bnep/bnep.h  |  8 ++---
 net/compat.c               |  6 ++--
 net/iucv/iucv.c            | 14 ++++----
 net/mac80211/cfg.c         |  2 +-
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/rx.c          |  2 +-
 net/sctp/sm_make_chunk.c   |  2 +-
 30 files changed, 133 insertions(+), 133 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index bed7a4682b90..c831467774d0 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -119,7 +119,7 @@ struct ethhdr {
 	unsigned char	h_dest[ETH_ALEN];	/* destination eth addr	*/
 	unsigned char	h_source[ETH_ALEN];	/* source ether addr	*/
 	__be16		h_proto;		/* packet type ID field	*/
-} __attribute__((packed));
+} __packed;
 
 #ifdef __KERNEL__
 #include <linux/skbuff.h>
diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h
index 5459c5c09930..9947c39e62f6 100644
--- a/include/linux/if_fddi.h
+++ b/include/linux/if_fddi.h
@@ -67,7 +67,7 @@ struct fddi_8022_1_hdr {
 	__u8	dsap;					/* destination service access point */
 	__u8	ssap;					/* source service access point */
 	__u8	ctrl;					/* control byte #1 */
-} __attribute__ ((packed));
+} __packed;
 
 /* Define 802.2 Type 2 header */
 struct fddi_8022_2_hdr {
@@ -75,7 +75,7 @@ struct fddi_8022_2_hdr {
 	__u8	ssap;					/* source service access point */
 	__u8	ctrl_1;					/* control byte #1 */
 	__u8	ctrl_2;					/* control byte #2 */
-} __attribute__ ((packed));
+} __packed;
 
 /* Define 802.2 SNAP header */
 #define FDDI_K_OUI_LEN	3
@@ -85,7 +85,7 @@ struct fddi_snap_hdr {
 	__u8	ctrl;					/* always 0x03 */
 	__u8	oui[FDDI_K_OUI_LEN];	/* organizational universal id */
 	__be16	ethertype;				/* packet type ID field */
-} __attribute__ ((packed));
+} __packed;
 
 /* Define FDDI LLC frame header */
 struct fddihdr {
@@ -98,7 +98,7 @@ struct fddihdr {
 		struct fddi_8022_2_hdr		llc_8022_2;
 		struct fddi_snap_hdr		llc_snap;
 		} hdr;
-} __attribute__ ((packed));
+} __packed;
 
 #ifdef __KERNEL__
 #include <linux/netdevice.h>
diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h
index 80b3a1056a5f..191ee0869bc1 100644
--- a/include/linux/if_frad.h
+++ b/include/linux/if_frad.h
@@ -135,7 +135,7 @@ struct frhdr
    __be16 PID;
 
 #define IP_NLPID pad 
-} __attribute__((packed));
+} __packed;
 
 /* see RFC 1490 for the definition of the following */
 #define FRAD_I_UI		0x03
diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h
index 8d038eb8db5c..5fe5f307c6f5 100644
--- a/include/linux/if_hippi.h
+++ b/include/linux/if_hippi.h
@@ -104,7 +104,7 @@ struct hippi_fp_hdr {
 	__be32		fixed;
 #endif
 	__be32		d2_size;
-} __attribute__ ((packed));
+} __packed;
 
 struct hippi_le_hdr {
 #if defined (__BIG_ENDIAN_BITFIELD)
@@ -129,7 +129,7 @@ struct hippi_le_hdr {
 	__u8		daddr[HIPPI_ALEN];
 	__u16		locally_administered;
 	__u8		saddr[HIPPI_ALEN];
-} __attribute__ ((packed));
+} __packed;
 
 #define HIPPI_OUI_LEN	3
 /*
@@ -142,12 +142,12 @@ struct hippi_snap_hdr {
 	__u8	ctrl;			/* always 0x03 */
 	__u8	oui[HIPPI_OUI_LEN];	/* organizational universal id (zero)*/
 	__be16	ethertype;		/* packet type ID field */
-} __attribute__ ((packed));
+} __packed;
 
 struct hippi_hdr {
 	struct hippi_fp_hdr	fp;
 	struct hippi_le_hdr	le;
 	struct hippi_snap_hdr	snap;
-} __attribute__ ((packed));
+} __packed;
 
 #endif	/* _LINUX_IF_HIPPI_H */
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index a6577af0c4e6..1925e0c3f162 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -59,7 +59,7 @@ struct sockaddr_pppox {
        union{ 
                struct pppoe_addr       pppoe; 
        }sa_addr; 
-}__attribute__ ((packed)); 
+} __packed;
 
 /* The use of the above union isn't viable because the size of this
  * struct must stay fixed over time -- applications use sizeof(struct
@@ -70,7 +70,7 @@ struct sockaddr_pppol2tp {
 	sa_family_t     sa_family;      /* address family, AF_PPPOX */
 	unsigned int    sa_protocol;    /* protocol identifier */
 	struct pppol2tp_addr pppol2tp;
-}__attribute__ ((packed));
+} __packed;
 
 /* The L2TPv3 protocol changes tunnel and session ids from 16 to 32
  * bits. So we need a different sockaddr structure.
@@ -79,7 +79,7 @@ struct sockaddr_pppol2tpv3 {
 	sa_family_t     sa_family;      /* address family, AF_PPPOX */
 	unsigned int    sa_protocol;    /* protocol identifier */
 	struct pppol2tpv3_addr pppol2tp;
-} __attribute__ ((packed));
+} __packed;
 
 /*********************************************************************
  *
@@ -129,7 +129,7 @@ struct pppoe_hdr {
 	__be16 sid;
 	__be16 length;
 	struct pppoe_tag tag[0];
-} __attribute__ ((packed));
+} __packed;
 
 /* Length of entire PPPoE + PPP header */
 #define PPPOE_SES_HLEN	8
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 99e1ab7e3eec..940e21595351 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -58,7 +58,7 @@ struct ipv6_opt_hdr {
 	/* 
 	 * TLV encoded option data follows.
 	 */
-} __attribute__ ((packed));	/* required for some archs */
+} __packed;	/* required for some archs */
 
 #define ipv6_destopt_hdr ipv6_opt_hdr
 #define ipv6_hopopt_hdr  ipv6_opt_hdr
@@ -99,7 +99,7 @@ struct ipv6_destopt_hao {
 	__u8			type;
 	__u8			length;
 	struct in6_addr		addr;
-} __attribute__ ((__packed__));
+} __packed;
 
 /*
  *	IPv6 fixed header
diff --git a/include/linux/isdnif.h b/include/linux/isdnif.h
index b9b5a684ed69..b8c23f88dd54 100644
--- a/include/linux/isdnif.h
+++ b/include/linux/isdnif.h
@@ -317,7 +317,7 @@ typedef struct T30_s {
 	__u8 r_scantime;
 	__u8 r_id[FAXIDLEN];
 	__u8 r_code;
-} __attribute__((packed)) T30_s;
+} __packed T30_s;
 
 #define ISDN_TTY_FAX_CONN_IN	0
 #define ISDN_TTY_FAX_CONN_OUT	1
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 78c3bed1c3f5..b5e7f2202484 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -251,7 +251,7 @@
 struct mISDNhead {
 	unsigned int	prim;
 	unsigned int	id;
-}  __attribute__((packed));
+}  __packed;
 
 #define MISDN_HEADER_LEN	sizeof(struct mISDNhead)
 #define MAX_DATA_SIZE		2048
diff --git a/include/linux/nbd.h b/include/linux/nbd.h
index 155719dab813..bb58854a8061 100644
--- a/include/linux/nbd.h
+++ b/include/linux/nbd.h
@@ -88,7 +88,7 @@ struct nbd_request {
 	char handle[8];
 	__be64 from;
 	__be32 len;
-} __attribute__ ((packed));
+} __packed;
 
 /*
  * This is the reply packet that nbd-server sends back to the client after
diff --git a/include/linux/ncp.h b/include/linux/ncp.h
index 99f0adeeb3f3..3ace8370e61e 100644
--- a/include/linux/ncp.h
+++ b/include/linux/ncp.h
@@ -27,7 +27,7 @@ struct ncp_request_header {
 	__u8 conn_high;
 	__u8 function;
 	__u8 data[0];
-} __attribute__((packed));
+} __packed;
 
 #define NCP_REPLY                (0x3333)
 #define NCP_WATCHDOG		 (0x3E3E)
@@ -42,7 +42,7 @@ struct ncp_reply_header {
 	__u8 completion_code;
 	__u8 connection_state;
 	__u8 data[0];
-} __attribute__((packed));
+} __packed;
 
 #define NCP_VOLNAME_LEN (16)
 #define NCP_NUMBER_OF_VOLUMES (256)
@@ -158,7 +158,7 @@ struct nw_info_struct {
 #ifdef __KERNEL__
 	struct nw_nfs_info nfs;
 #endif
-} __attribute__((packed));
+} __packed;
 
 /* modify mask - use with MODIFY_DOS_INFO structure */
 #define DM_ATTRIBUTES		  (cpu_to_le32(0x02))
@@ -190,12 +190,12 @@ struct nw_modify_dos_info {
 	__u16 inheritanceGrantMask;
 	__u16 inheritanceRevokeMask;
 	__u32 maximumSpace;
-} __attribute__((packed));
+} __packed;
 
 struct nw_search_sequence {
 	__u8 volNumber;
 	__u32 dirBase;
 	__u32 sequence;
-} __attribute__((packed));
+} __packed;
 
 #endif				/* _LINUX_NCP_H */
diff --git a/include/linux/ncp_fs_sb.h b/include/linux/ncp_fs_sb.h
index 5ec9ca671687..8da05bc098ca 100644
--- a/include/linux/ncp_fs_sb.h
+++ b/include/linux/ncp_fs_sb.h
@@ -104,13 +104,13 @@ struct ncp_server {
 
 		unsigned int state;		/* STREAM only: receiver state */
 		struct {
-			__u32 magic __attribute__((packed));
-			__u32 len __attribute__((packed));
-			__u16 type __attribute__((packed));
-			__u16 p1 __attribute__((packed));
-			__u16 p2 __attribute__((packed));
-			__u16 p3 __attribute__((packed));
-			__u16 type2 __attribute__((packed));
+			__u32 magic __packed;
+			__u32 len __packed;
+			__u16 type __packed;
+			__u16 p1 __packed;
+			__u16 p2 __packed;
+			__u16 p3 __packed;
+			__u16 type2 __packed;
 		} buf;				/* STREAM only: temporary buffer */
 		unsigned char* ptr;		/* STREAM only: pointer to data */
 		size_t len;			/* STREAM only: length of data to receive */
diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index e5126cff9b2a..24426c3d6b5a 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -56,7 +56,7 @@ struct phonethdr {
 	__be16	pn_length;
 	__u8	pn_robj;
 	__u8	pn_sobj;
-} __attribute__((packed));
+} __packed;
 
 /* Common Phonet payload header */
 struct phonetmsg {
@@ -98,7 +98,7 @@ struct sockaddr_pn {
 	__u8 spn_dev;
 	__u8 spn_resource;
 	__u8 spn_zero[sizeof(struct sockaddr) - sizeof(sa_family_t) - 3];
-} __attribute__ ((packed));
+} __packed;
 
 /* Well known address */
 #define PN_DEV_PC	0x10
diff --git a/include/linux/rds.h b/include/linux/rds.h
index cab4994c2f63..24bce3ded9ea 100644
--- a/include/linux/rds.h
+++ b/include/linux/rds.h
@@ -100,7 +100,7 @@
 struct rds_info_counter {
 	u_int8_t	name[32];
 	u_int64_t	value;
-} __attribute__((packed));
+} __packed;
 
 #define RDS_INFO_CONNECTION_FLAG_SENDING	0x01
 #define RDS_INFO_CONNECTION_FLAG_CONNECTING	0x02
@@ -115,7 +115,7 @@ struct rds_info_connection {
 	__be32		faddr;
 	u_int8_t	transport[TRANSNAMSIZ];		/* null term ascii */
 	u_int8_t	flags;
-} __attribute__((packed));
+} __packed;
 
 struct rds_info_flow {
 	__be32		laddr;
@@ -123,7 +123,7 @@ struct rds_info_flow {
 	u_int32_t	bytes;
 	__be16		lport;
 	__be16		fport;
-} __attribute__((packed));
+} __packed;
 
 #define RDS_INFO_MESSAGE_FLAG_ACK               0x01
 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK          0x02
@@ -136,7 +136,7 @@ struct rds_info_message {
 	__be16		lport;
 	__be16		fport;
 	u_int8_t	flags;
-} __attribute__((packed));
+} __packed;
 
 struct rds_info_socket {
 	u_int32_t	sndbuf;
@@ -146,7 +146,7 @@ struct rds_info_socket {
 	__be16		connected_port;
 	u_int32_t	rcvbuf;
 	u_int64_t	inum;
-} __attribute__((packed));
+} __packed;
 
 struct rds_info_tcp_socket {
 	__be32          local_addr;
@@ -158,7 +158,7 @@ struct rds_info_tcp_socket {
 	u_int32_t       last_sent_nxt;
 	u_int32_t       last_expected_una;
 	u_int32_t       last_seen_una;
-} __attribute__((packed));
+} __packed;
 
 #define RDS_IB_GID_LEN	16
 struct rds_info_rdma_connection {
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index c20d3ce673c0..c11a28706fa4 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -61,7 +61,7 @@ typedef struct sctphdr {
 	__be16 dest;
 	__be32 vtag;
 	__le32 checksum;
-} __attribute__((packed)) sctp_sctphdr_t;
+} __packed sctp_sctphdr_t;
 
 #ifdef __KERNEL__
 #include <linux/skbuff.h>
@@ -77,7 +77,7 @@ typedef struct sctp_chunkhdr {
 	__u8 type;
 	__u8 flags;
 	__be16 length;
-} __attribute__((packed)) sctp_chunkhdr_t;
+} __packed sctp_chunkhdr_t;
 
 
 /* Section 3.2.  Chunk Type Values.
@@ -167,7 +167,7 @@ enum { SCTP_CHUNK_FLAG_T = 0x01 };
 typedef struct sctp_paramhdr {
 	__be16 type;
 	__be16 length;
-} __attribute__((packed)) sctp_paramhdr_t;
+} __packed sctp_paramhdr_t;
 
 typedef enum {
 
@@ -228,12 +228,12 @@ typedef struct sctp_datahdr {
 	__be16 ssn;
 	__be32 ppid;
 	__u8  payload[0];
-} __attribute__((packed)) sctp_datahdr_t;
+} __packed sctp_datahdr_t;
 
 typedef struct sctp_data_chunk {
         sctp_chunkhdr_t chunk_hdr;
         sctp_datahdr_t  data_hdr;
-} __attribute__((packed)) sctp_data_chunk_t;
+} __packed sctp_data_chunk_t;
 
 /* DATA Chuck Specific Flags */
 enum {
@@ -259,78 +259,78 @@ typedef struct sctp_inithdr {
 	__be16 num_inbound_streams;
 	__be32 initial_tsn;
 	__u8  params[0];
-} __attribute__((packed)) sctp_inithdr_t;
+} __packed sctp_inithdr_t;
 
 typedef struct sctp_init_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_inithdr_t init_hdr;
-} __attribute__((packed)) sctp_init_chunk_t;
+} __packed sctp_init_chunk_t;
 
 
 /* Section 3.3.2.1. IPv4 Address Parameter (5) */
 typedef struct sctp_ipv4addr_param {
 	sctp_paramhdr_t param_hdr;
 	struct in_addr  addr;
-} __attribute__((packed)) sctp_ipv4addr_param_t;
+} __packed sctp_ipv4addr_param_t;
 
 /* Section 3.3.2.1. IPv6 Address Parameter (6) */
 typedef struct sctp_ipv6addr_param {
 	sctp_paramhdr_t param_hdr;
 	struct in6_addr addr;
-} __attribute__((packed)) sctp_ipv6addr_param_t;
+} __packed sctp_ipv6addr_param_t;
 
 /* Section 3.3.2.1 Cookie Preservative (9) */
 typedef struct sctp_cookie_preserve_param {
 	sctp_paramhdr_t param_hdr;
 	__be32          lifespan_increment;
-} __attribute__((packed)) sctp_cookie_preserve_param_t;
+} __packed sctp_cookie_preserve_param_t;
 
 /* Section 3.3.2.1 Host Name Address (11) */
 typedef struct sctp_hostname_param {
 	sctp_paramhdr_t param_hdr;
 	uint8_t hostname[0];
-} __attribute__((packed)) sctp_hostname_param_t;
+} __packed sctp_hostname_param_t;
 
 /* Section 3.3.2.1 Supported Address Types (12) */
 typedef struct sctp_supported_addrs_param {
 	sctp_paramhdr_t param_hdr;
 	__be16 types[0];
-} __attribute__((packed)) sctp_supported_addrs_param_t;
+} __packed sctp_supported_addrs_param_t;
 
 /* Appendix A. ECN Capable (32768) */
 typedef struct sctp_ecn_capable_param {
 	sctp_paramhdr_t param_hdr;
-} __attribute__((packed)) sctp_ecn_capable_param_t;
+} __packed sctp_ecn_capable_param_t;
 
 /* ADDIP Section 3.2.6 Adaptation Layer Indication */
 typedef struct sctp_adaptation_ind_param {
 	struct sctp_paramhdr param_hdr;
 	__be32 adaptation_ind;
-} __attribute__((packed)) sctp_adaptation_ind_param_t;
+} __packed sctp_adaptation_ind_param_t;
 
 /* ADDIP Section 4.2.7 Supported Extensions Parameter */
 typedef struct sctp_supported_ext_param {
 	struct sctp_paramhdr param_hdr;
 	__u8 chunks[0];
-} __attribute__((packed)) sctp_supported_ext_param_t;
+} __packed sctp_supported_ext_param_t;
 
 /* AUTH Section 3.1 Random */
 typedef struct sctp_random_param {
 	sctp_paramhdr_t param_hdr;
 	__u8 random_val[0];
-} __attribute__((packed)) sctp_random_param_t;
+} __packed sctp_random_param_t;
 
 /* AUTH Section 3.2 Chunk List */
 typedef struct sctp_chunks_param {
 	sctp_paramhdr_t param_hdr;
 	__u8 chunks[0];
-} __attribute__((packed)) sctp_chunks_param_t;
+} __packed sctp_chunks_param_t;
 
 /* AUTH Section 3.3 HMAC Algorithm */
 typedef struct sctp_hmac_algo_param {
 	sctp_paramhdr_t param_hdr;
 	__be16 hmac_ids[0];
-} __attribute__((packed)) sctp_hmac_algo_param_t;
+} __packed sctp_hmac_algo_param_t;
 
 /* RFC 2960.  Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2):
  *   The INIT ACK chunk is used to acknowledge the initiation of an SCTP
@@ -342,13 +342,13 @@ typedef sctp_init_chunk_t sctp_initack_chunk_t;
 typedef struct sctp_cookie_param {
 	sctp_paramhdr_t p;
 	__u8 body[0];
-} __attribute__((packed)) sctp_cookie_param_t;
+} __packed sctp_cookie_param_t;
 
 /* Section 3.3.3.1 Unrecognized Parameters (8) */
 typedef struct sctp_unrecognized_param {
 	sctp_paramhdr_t param_hdr;
 	sctp_paramhdr_t unrecognized;
-} __attribute__((packed)) sctp_unrecognized_param_t;
+} __packed sctp_unrecognized_param_t;
 
 
@@ -363,7 +363,7 @@ typedef struct sctp_unrecognized_param {
 typedef struct sctp_gap_ack_block {
 	__be16 start;
 	__be16 end;
-} __attribute__((packed)) sctp_gap_ack_block_t;
+} __packed sctp_gap_ack_block_t;
 
 typedef __be32 sctp_dup_tsn_t;
 
@@ -378,12 +378,12 @@ typedef struct sctp_sackhdr {
 	__be16 num_gap_ack_blocks;
 	__be16 num_dup_tsns;
 	sctp_sack_variable_t variable[0];
-} __attribute__((packed)) sctp_sackhdr_t;
+} __packed sctp_sackhdr_t;
 
 typedef struct sctp_sack_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_sackhdr_t sack_hdr;
-} __attribute__((packed)) sctp_sack_chunk_t;
+} __packed sctp_sack_chunk_t;
 
 
 /* RFC 2960.  Section 3.3.5 Heartbeat Request (HEARTBEAT) (4):
@@ -395,12 +395,12 @@ typedef struct sctp_sack_chunk {
 
 typedef struct sctp_heartbeathdr {
 	sctp_paramhdr_t info;
-} __attribute__((packed)) sctp_heartbeathdr_t;
+} __packed sctp_heartbeathdr_t;
 
 typedef struct sctp_heartbeat_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_heartbeathdr_t hb_hdr;
-} __attribute__((packed)) sctp_heartbeat_chunk_t;
+} __packed sctp_heartbeat_chunk_t;
 
 
 /* For the abort and shutdown ACK we must carry the init tag in the
@@ -409,7 +409,7 @@ typedef struct sctp_heartbeat_chunk {
  */
 typedef struct sctp_abort_chunk {
         sctp_chunkhdr_t uh;
-} __attribute__((packed)) sctp_abort_chunk_t;
+} __packed sctp_abort_chunk_t;
 
 
 /* For the graceful shutdown we must carry the tag (in common header)
@@ -417,12 +417,12 @@ typedef struct sctp_abort_chunk {
  */
 typedef struct sctp_shutdownhdr {
 	__be32 cum_tsn_ack;
-} __attribute__((packed)) sctp_shutdownhdr_t;
+} __packed sctp_shutdownhdr_t;
 
 struct sctp_shutdown_chunk_t {
         sctp_chunkhdr_t    chunk_hdr;
         sctp_shutdownhdr_t shutdown_hdr;
-} __attribute__ ((packed));
+} __packed;
 
 /* RFC 2960.  Section 3.3.10 Operation Error (ERROR) (9) */
 
@@ -430,12 +430,12 @@ typedef struct sctp_errhdr {
 	__be16 cause;
 	__be16 length;
 	__u8  variable[0];
-} __attribute__((packed)) sctp_errhdr_t;
+} __packed sctp_errhdr_t;
 
 typedef struct sctp_operr_chunk {
         sctp_chunkhdr_t chunk_hdr;
 	sctp_errhdr_t   err_hdr;
-} __attribute__((packed)) sctp_operr_chunk_t;
+} __packed sctp_operr_chunk_t;
 
 /* RFC 2960 3.3.10 - Operation Error
  *
@@ -525,7 +525,7 @@ typedef struct sctp_ecnehdr {
 typedef struct sctp_ecne_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_ecnehdr_t ence_hdr;
-} __attribute__((packed)) sctp_ecne_chunk_t;
+} __packed sctp_ecne_chunk_t;
 
 /* RFC 2960.  Appendix A.  Explicit Congestion Notification.
  *   Congestion Window Reduced (CWR) (13)
@@ -537,7 +537,7 @@ typedef struct sctp_cwrhdr {
 typedef struct sctp_cwr_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_cwrhdr_t cwr_hdr;
-} __attribute__((packed)) sctp_cwr_chunk_t;
+} __packed sctp_cwr_chunk_t;
 
 /* PR-SCTP
  * 3.2 Forward Cumulative TSN Chunk Definition (FORWARD TSN)
@@ -588,17 +588,17 @@ typedef struct sctp_cwr_chunk {
 struct sctp_fwdtsn_skip {
 	__be16 stream;
 	__be16 ssn;
-} __attribute__((packed));
+} __packed;
 
 struct sctp_fwdtsn_hdr {
 	__be32 new_cum_tsn;
 	struct sctp_fwdtsn_skip skip[0];
-} __attribute((packed));
+} __packed;
 
 struct sctp_fwdtsn_chunk {
 	struct sctp_chunkhdr chunk_hdr;
 	struct sctp_fwdtsn_hdr fwdtsn_hdr;
-} __attribute((packed));
+} __packed;
 
 
 /* ADDIP
@@ -636,17 +636,17 @@ struct sctp_fwdtsn_chunk {
 typedef struct sctp_addip_param {
 	sctp_paramhdr_t	param_hdr;
 	__be32		crr_id;
-} __attribute__((packed)) sctp_addip_param_t;
+} __packed sctp_addip_param_t;
 
 typedef struct sctp_addiphdr {
 	__be32	serial;
 	__u8	params[0];
-} __attribute__((packed)) sctp_addiphdr_t;
+} __packed sctp_addiphdr_t;
 
 typedef struct sctp_addip_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_addiphdr_t addip_hdr;
-} __attribute__((packed)) sctp_addip_chunk_t;
+} __packed sctp_addip_chunk_t;
 
 /* AUTH
  * Section 4.1  Authentication Chunk (AUTH)
@@ -701,11 +701,11 @@ typedef struct sctp_authhdr {
 	__be16 shkey_id;
 	__be16 hmac_id;
 	__u8   hmac[0];
-} __attribute__((packed)) sctp_authhdr_t;
+} __packed sctp_authhdr_t;
 
 typedef struct sctp_auth_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_authhdr_t auth_hdr;
-} __attribute__((packed)) sctp_auth_chunk_t;
+} __packed sctp_auth_chunk_t;
 
 #endif /* __LINUX_SCTP_H__ */
diff --git a/include/linux/wlp.h b/include/linux/wlp.h
index ac95ce6606ac..c76fe2392506 100644
--- a/include/linux/wlp.h
+++ b/include/linux/wlp.h
@@ -300,7 +300,7 @@ struct wlp_ie {
 	__le16 cycle_param;
 	__le16 acw_anchor_addr;
 	u8 wssid_hash_list[];
-} __attribute__((packed));
+} __packed;
 
 static inline int wlp_ie_hash_length(struct wlp_ie *ie)
 {
@@ -324,7 +324,7 @@ static inline void wlp_ie_set_hash_length(struct wlp_ie *ie, int hash_length)
  */
 struct wlp_nonce {
 	u8 data[16];
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP UUID
@@ -336,7 +336,7 @@ struct wlp_nonce {
  */
 struct wlp_uuid {
 	u8 data[16];
-} __attribute__((packed));
+} __packed;
 
 
 /**
@@ -348,7 +348,7 @@ struct wlp_dev_type {
 	u8 OUI[3];
 	u8 OUIsubdiv;
 	__le16 subID;
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP frame header
@@ -357,7 +357,7 @@ struct wlp_dev_type {
 struct wlp_frame_hdr {
 	__le16 mux_hdr;			/* WLP_PROTOCOL_ID */
 	enum wlp_frame_type type:8;
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP attribute field header
@@ -368,7 +368,7 @@ struct wlp_frame_hdr {
 struct wlp_attr_hdr {
 	__le16 type;
 	__le16 length;
-} __attribute__((packed));
+} __packed;
 
 /**
  * Device information commonly used together
@@ -401,13 +401,13 @@ struct wlp_device_info {
 struct wlp_attr_##name {						\
 	struct wlp_attr_hdr hdr;					\
 	type name;							\
-} __attribute__((packed));
+} __packed;
 
 #define wlp_attr_array(type, name)					\
 struct wlp_attr_##name {						\
 	struct wlp_attr_hdr hdr;					\
 	type name[];							\
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP association attribute fields
@@ -483,7 +483,7 @@ struct wlp_wss_info {
 	struct wlp_attr_accept_enrl accept;
 	struct wlp_attr_wss_sec_status sec_stat;
 	struct wlp_attr_wss_bcast bcast;
-} __attribute__((packed));
+} __packed;
 
 /* WLP WSS Information */
 wlp_attr_array(struct wlp_wss_info, wss_info)
@@ -520,7 +520,7 @@ wlp_attr(u8, wlp_assc_err)
 struct wlp_frame_std_abbrv_hdr {
 	struct wlp_frame_hdr hdr;
 	u8 tag;
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP association frames
@@ -533,7 +533,7 @@ struct wlp_frame_assoc {
 	struct wlp_attr_version version;
 	struct wlp_attr_msg_type msg_type;
 	u8 attr[];
-} __attribute__((packed));
+} __packed;
 
 /* Ethernet to dev address mapping */
 struct wlp_eda {
diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h
index 511a459ec10f..0916bbf3bdff 100644
--- a/include/net/dn_dev.h
+++ b/include/net/dn_dev.h
@@ -101,7 +101,7 @@ struct dn_short_packet {
 	__le16 dstnode;
 	__le16 srcnode;
 	__u8   forward;
-} __attribute__((packed));
+} __packed;
 
 struct dn_long_packet {
 	__u8   msgflg;
@@ -115,7 +115,7 @@ struct dn_long_packet {
 	__u8   visit_ct;
 	__u8   s_class;
 	__u8   pt;
-} __attribute__((packed));
+} __packed;
 
 /*------------------------- DRP - Routing messages ---------------------*/
 
@@ -132,7 +132,7 @@ struct endnode_hello_message {
 	__u8   mpd;
 	__u8   datalen;
 	__u8   data[2];
-} __attribute__((packed));
+} __packed;
 
 struct rtnode_hello_message {
 	__u8   msgflg;
@@ -144,7 +144,7 @@ struct rtnode_hello_message {
 	__u8   area;
 	__le16  timer;
 	__u8   mpd;
-} __attribute__((packed));
+} __packed;
 
 
 extern void dn_dev_init(void);
diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h
index 17d43d2db5ec..e43a2893f132 100644
--- a/include/net/dn_nsp.h
+++ b/include/net/dn_nsp.h
@@ -74,18 +74,18 @@ struct nsp_data_seg_msg {
 	__u8   msgflg;
 	__le16 dstaddr;
 	__le16 srcaddr;
-} __attribute__((packed));
+} __packed;
 
 struct nsp_data_opt_msg {
 	__le16 acknum;
 	__le16 segnum;
 	__le16 lsflgs;
-} __attribute__((packed));
+} __packed;
 
 struct nsp_data_opt_msg1 {
 	__le16 acknum;
 	__le16 segnum;
-} __attribute__((packed));
+} __packed;
 
 
 /* Acknowledgment Message (data/other data)                             */
@@ -94,13 +94,13 @@ struct nsp_data_ack_msg {
 	__le16 dstaddr;
 	__le16 srcaddr;
 	__le16 acknum;
-} __attribute__((packed));
+} __packed;
 
 /* Connect Acknowledgment Message */
 struct  nsp_conn_ack_msg {
 	__u8 msgflg;
 	__le16 dstaddr;
-} __attribute__((packed));
+} __packed;
 
 
 /* Connect Initiate/Retransmit Initiate/Connect Confirm */
@@ -117,7 +117,7 @@ struct  nsp_conn_init_msg {
 #define NSP_FC_MASK   0x0c            /* FC type mask         */
 	__u8   info;
 	__le16 segsize;
-} __attribute__((packed));
+} __packed;
 
 /* Disconnect Initiate/Disconnect Confirm */
 struct  nsp_disconn_init_msg {
@@ -125,7 +125,7 @@ struct  nsp_disconn_init_msg {
 	__le16 dstaddr;
 	__le16 srcaddr;
 	__le16 reason;
-} __attribute__((packed));
+} __packed;
 
 
@@ -135,7 +135,7 @@ struct  srcobj_fmt {
 	__le16 grpcode;
 	__le16 usrcode;
 	__u8   dlen;
-} __attribute__((packed));
+} __packed;
 
 /*
  * A collection of functions for manipulating the sequence
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index fbf9d1cda27b..fc94ec568a50 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -27,6 +27,6 @@ struct ipv6_tlv_tnl_enc_lim {
 	__u8 type;		/* type-code for option         */
 	__u8 length;		/* option length                */
 	__u8 encap_limit;	/* tunnel encapsulation limit   */
-} __attribute__ ((packed));
+} __packed;
 
 #endif
diff --git a/include/net/ipx.h b/include/net/ipx.h
index ef51a668ba19..05d7e4a88b49 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -27,9 +27,9 @@ struct ipx_address {
 #define IPX_MAX_PPROP_HOPS 8
 
 struct ipxhdr {
-	__be16			ipx_checksum __attribute__ ((packed));
+	__be16			ipx_checksum __packed;
 #define IPX_NO_CHECKSUM	cpu_to_be16(0xFFFF)
-	__be16			ipx_pktsize __attribute__ ((packed));
+	__be16			ipx_pktsize __packed;
 	__u8			ipx_tctrl;
 	__u8			ipx_type;
 #define IPX_TYPE_UNKNOWN	0x00
@@ -38,8 +38,8 @@ struct ipxhdr {
 #define IPX_TYPE_SPX		0x05	/* SPX protocol */
 #define IPX_TYPE_NCP		0x11	/* $lots for docs on this (SPIT) */
 #define IPX_TYPE_PPROP		0x14	/* complicated flood fill brdcast */
-	struct ipx_address	ipx_dest __attribute__ ((packed));
-	struct ipx_address	ipx_source __attribute__ ((packed));
+	struct ipx_address	ipx_dest __packed;
+	struct ipx_address	ipx_source __packed;
 };
 
 static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb)
diff --git a/include/net/mip6.h b/include/net/mip6.h
index a83ad1982a90..26ba99b5a4b1 100644
--- a/include/net/mip6.h
+++ b/include/net/mip6.h
@@ -39,7 +39,7 @@ struct ip6_mh {
 	__u16	ip6mh_cksum;
 	/* Followed by type specific messages */
 	__u8	data[0];
-} __attribute__ ((__packed__));
+} __packed;
 
 #define IP6_MH_TYPE_BRR		0   /* Binding Refresh Request */
 #define IP6_MH_TYPE_HOTI	1   /* HOTI Message   */
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index f76f22d05721..895997bc2ead 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -82,7 +82,7 @@ struct ra_msg {
 struct nd_opt_hdr {
 	__u8		nd_opt_type;
 	__u8		nd_opt_len;
-} __attribute__((__packed__));
+} __packed;
 
 
 extern int			ndisc_init(void);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 4b860116e096..f9e7473613bd 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -443,7 +443,7 @@ struct sctp_signed_cookie {
 	__u8 signature[SCTP_SECRET_SIZE];
 	__u32 __pad;		/* force sctp_cookie alignment to 64 bits */
 	struct sctp_cookie c;
-} __attribute__((packed));
+} __packed;
 
 /* This is another convenience type to allocate memory for address
  * params for the maximum size and pass such structures around
@@ -488,7 +488,7 @@ typedef struct sctp_sender_hb_info {
 	union sctp_addr daddr;
 	unsigned long sent_at;
 	__u64 hb_nonce;
-} __attribute__((packed)) sctp_sender_hb_info_t;
+} __packed sctp_sender_hb_info_t;
 
 /*
  *  RFC 2960 1.3.2 Sequenced Delivery within Streams
diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h
index b69e6e173ea1..9b2c30897e50 100644
--- a/include/rxrpc/packet.h
+++ b/include/rxrpc/packet.h
@@ -65,7 +65,7 @@ struct rxrpc_header {
 	};
 	__be16		serviceId;	/* service ID */
 
-} __attribute__((packed));
+} __packed;
 
 #define __rxrpc_header_off(X) offsetof(struct rxrpc_header,X)
 
@@ -120,7 +120,7 @@ struct rxrpc_ackpacket {
 #define RXRPC_ACK_TYPE_NACK		0
 #define RXRPC_ACK_TYPE_ACK		1
 
-} __attribute__((packed));
+} __packed;
 
 /*
  * ACK packets can have a further piece of information tagged on the end
@@ -141,7 +141,7 @@ struct rxkad_challenge {
 	__be32		nonce;		/* encrypted random number */
 	__be32		min_level;	/* minimum security level */
 	__be32		__padding;	/* padding to 8-byte boundary */
-} __attribute__((packed));
+} __packed;
 
 /*****************************************************************************/
 /*
@@ -164,7 +164,7 @@ struct rxkad_response {
 
 	__be32		kvno;		/* Kerberos key version number */
 	__be32		ticket_len;	/* Kerberos ticket length  */
-} __attribute__((packed));
+} __packed;
 
 /*****************************************************************************/
 /*
diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h
index 0d9e506f5d5a..70672544db86 100644
--- a/net/bluetooth/bnep/bnep.h
+++ b/net/bluetooth/bnep/bnep.h
@@ -86,26 +86,26 @@ struct bnep_setup_conn_req {
 	__u8  ctrl;
 	__u8  uuid_size;
 	__u8  service[0];
-} __attribute__((packed));
+} __packed;
 
 struct bnep_set_filter_req {
 	__u8  type;
 	__u8  ctrl;
 	__be16 len;
 	__u8  list[0];
-} __attribute__((packed));
+} __packed;
 
 struct bnep_control_rsp {
 	__u8  type;
 	__u8  ctrl;
 	__be16 resp;
-} __attribute__((packed));
+} __packed;
 
 struct bnep_ext_hdr {
 	__u8  type;
 	__u8  len;
 	__u8  data[0];
-} __attribute__((packed));
+} __packed;
 
 /* BNEP ioctl defines */
 #define BNEPCONNADD	_IOW('B', 200, int)
diff --git a/net/compat.c b/net/compat.c
index ec24d9edb025..1cf75905f132 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -531,7 +531,7 @@ struct compat_group_req {
 	__u32				 gr_interface;
 	struct __kernel_sockaddr_storage gr_group
 		__attribute__ ((aligned(4)));
-} __attribute__ ((packed));
+} __packed;
 
 struct compat_group_source_req {
 	__u32				 gsr_interface;
@@ -539,7 +539,7 @@ struct compat_group_source_req {
 		__attribute__ ((aligned(4)));
 	struct __kernel_sockaddr_storage gsr_source
 		__attribute__ ((aligned(4)));
-} __attribute__ ((packed));
+} __packed;
 
 struct compat_group_filter {
 	__u32				 gf_interface;
@@ -549,7 +549,7 @@ struct compat_group_filter {
 	__u32				 gf_numsrc;
 	struct __kernel_sockaddr_storage gf_slist[1]
 		__attribute__ ((aligned(4)));
-} __attribute__ ((packed));
+} __packed;
 
 #define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \
 			sizeof(struct __kernel_sockaddr_storage))
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index f28ad2cc8428..499c045d6910 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1463,7 +1463,7 @@ struct iucv_path_pending {
 	u32 res3;
 	u8  ippollfg;
 	u8  res4[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_pending(struct iucv_irq_data *data)
 {
@@ -1524,7 +1524,7 @@ struct iucv_path_complete {
 	u32 res3;
 	u8  ippollfg;
 	u8  res4[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_complete(struct iucv_irq_data *data)
 {
@@ -1554,7 +1554,7 @@ struct iucv_path_severed {
 	u32 res4;
 	u8  ippollfg;
 	u8  res5[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_severed(struct iucv_irq_data *data)
 {
@@ -1590,7 +1590,7 @@ struct iucv_path_quiesced {
 	u32 res4;
 	u8  ippollfg;
 	u8  res5[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_quiesced(struct iucv_irq_data *data)
 {
@@ -1618,7 +1618,7 @@ struct iucv_path_resumed {
 	u32 res4;
 	u8  ippollfg;
 	u8  res5[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_resumed(struct iucv_irq_data *data)
 {
@@ -1649,7 +1649,7 @@ struct iucv_message_complete {
 	u32 ipbfln2f;
 	u8  ippollfg;
 	u8  res2[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_message_complete(struct iucv_irq_data *data)
 {
@@ -1694,7 +1694,7 @@ struct iucv_message_pending {
 	u32 ipbfln2f;
 	u8  ippollfg;
 	u8  res2[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_message_pending(struct iucv_irq_data *data)
 {
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c7000a6ca379..a2ed0f7b5568 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -600,7 +600,7 @@ struct iapp_layer2_update {
 	u8 ssap;		/* 0 */
 	u8 control;
 	u8 xid_info[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void ieee80211_send_layer2_update(struct sta_info *sta)
 {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 1a9e2da37a93..ec3e5c3e27bd 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1084,7 +1084,7 @@ struct ieee80211_tx_status_rtap_hdr {
 	u8 padding_for_rate;
 	__le16 tx_flags;
 	u8 data_retries;
-} __attribute__ ((packed));
+} __packed;
 
 
 /* HT */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6e2a7bcd8cb8..2d9a2ee94e17 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2139,7 +2139,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
 		u8 rate_or_pad;
 		__le16 chan_freq;
 		__le16 chan_flags;
-	} __attribute__ ((packed)) *rthdr;
+	} __packed *rthdr;
 	struct sk_buff *skb = rx->skb, *skb2;
 	struct net_device *prev_dev = NULL;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index bd2a50b482ac..246f92924658 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1817,7 +1817,7 @@ malformed:
 struct __sctp_missing {
 	__be32 num_missing;
 	__be16 type;
-}  __attribute__((packed));
+}  __packed;
 
 /*
  * Report a missing mandatory parameter.
-- 
cgit v1.2.3-55-g7522


From 8764ab2ca7ab5055e1ca80f9cfa4970c34acb804 Mon Sep 17 00:00:00 2001
From: Steffen Klassert
Date: Fri, 4 Jun 2010 01:57:38 +0000
Subject: net: check for refcount if pop a stacked dst_entry

xfrm triggers a warning if dst_pop() drops a refcount
on a noref dst. This patch changes dst_pop() to
skb_dst_pop(). skb_dst_pop() drops the refcnt only
on a refcounted dst. Also we don't clone the child
dst_entry, so it is not refcounted and we can use
skb_dst_set_noref() in xfrm_output_one().

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h      | 6 +++---
 net/xfrm/xfrm_output.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dst.h b/include/net/dst.h
index 612069beda73..81d1413a8701 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -250,11 +250,11 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev)
  * Linux networking.  Thus, destinations are stackable.
  */
 
-static inline struct dst_entry *dst_pop(struct dst_entry *dst)
+static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb)
 {
-	struct dst_entry *child = dst_clone(dst->child);
+	struct dst_entry *child = skb_dst(skb)->child;
 
-	dst_release(dst);
+	skb_dst_drop(skb);
 	return child;
 }
 
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 6a329158bdfa..a3cca0a94346 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -95,13 +95,13 @@ resume:
 			goto error_nolock;
 		}
 
-		dst = dst_pop(dst);
+		dst = skb_dst_pop(skb);
 		if (!dst) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
 			err = -EHOSTUNREACH;
 			goto error_nolock;
 		}
-		skb_dst_set(skb, dst);
+		skb_dst_set_noref(skb, dst);
 		x = dst->xfrm;
 	} while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
 
-- 
cgit v1.2.3-55-g7522


From a8b690f98baf9fb1902b8eeab801351ea603fa3a Mon Sep 17 00:00:00 2001
From: Tom Herbert
Date: Mon, 7 Jun 2010 00:43:42 -0700
Subject: tcp: Fix slowness in read /proc/net/tcp

This patch address a serious performance issue in reading the
TCP sockets table (/proc/net/tcp).

Reading the full table is done by a number of sequential read
operations.  At each read operation, a seek is done to find the
last socket that was previously read.  This seek operation requires
that the sockets in the table need to be counted up to the current
file position, and to count each of these requires taking a lock for
each non-empty bucket.  The whole algorithm is O(n^2).

The fix is to cache the last bucket value, offset within the bucket,
and the file position returned by the last read operation.   On the
next sequential read, the bucket and offset are used to find the
last read socket immediately without needing ot scan the previous
buckets  the table.  This algorithm t read the whole table is O(n).

The improvement offered by this patch is easily show by performing
cat'ing /proc/net/tcp on a machine with a lot of connections.  With
about 182K connections in the table, I see the following:

- Without patch
time cat /proc/net/tcp > /dev/null

real	1m56.729s
user	0m0.214s
sys	1m56.344s

- With patch
time cat /proc/net/tcp > /dev/null

real	0m0.894s
user	0m0.290s
sys	0m0.594s

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h   |  3 +-
 net/ipv4/tcp_ipv4.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 86 insertions(+), 9 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a1449144848a..573166484413 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1413,7 +1413,8 @@ struct tcp_iter_state {
 	sa_family_t		family;
 	enum tcp_seq_states	state;
 	struct sock		*syn_wait_sk;
-	int			bucket, sbucket, num, uid;
+	int			bucket, offset, sbucket, num, uid;
+	loff_t			last_pos;
 };
 
 extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index acdc4c989853..7f976af27bf0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1980,6 +1980,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
 }
 
+/*
+ * Get next listener socket follow cur.  If cur is NULL, get first socket
+ * starting from bucket given in st->bucket; when st->bucket is zero the
+ * very first socket in the hash table is returned.
+ */
 static void *listening_get_next(struct seq_file *seq, void *cur)
 {
 	struct inet_connection_sock *icsk;
@@ -1990,14 +1995,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 	struct net *net = seq_file_net(seq);
 
 	if (!sk) {
-		st->bucket = 0;
-		ilb = &tcp_hashinfo.listening_hash[0];
+		ilb = &tcp_hashinfo.listening_hash[st->bucket];
 		spin_lock_bh(&ilb->lock);
 		sk = sk_nulls_head(&ilb->head);
+		st->offset = 0;
 		goto get_sk;
 	}
 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
 	++st->num;
+	++st->offset;
 
 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
 		struct request_sock *req = cur;
@@ -2012,6 +2018,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 				}
 				req = req->dl_next;
 			}
+			st->offset = 0;
 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
 				break;
 get_req:
@@ -2047,6 +2054,7 @@ start_req:
 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 	}
 	spin_unlock_bh(&ilb->lock);
+	st->offset = 0;
 	if (++st->bucket < INET_LHTABLE_SIZE) {
 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
 		spin_lock_bh(&ilb->lock);
@@ -2060,7 +2068,12 @@ out:
 
 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
 {
-	void *rc = listening_get_next(seq, NULL);
+	struct tcp_iter_state *st = seq->private;
+	void *rc;
+
+	st->bucket = 0;
+	st->offset = 0;
+	rc = listening_get_next(seq, NULL);
 
 	while (rc && *pos) {
 		rc = listening_get_next(seq, rc);
@@ -2075,13 +2088,18 @@ static inline int empty_bucket(struct tcp_iter_state *st)
 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
 }
 
+/*
+ * Get first established socket starting from bucket given in st->bucket.
+ * If st->bucket is zero, the very first socket in the hash is returned.
+ */
 static void *established_get_first(struct seq_file *seq)
 {
 	struct tcp_iter_state *st = seq->private;
 	struct net *net = seq_file_net(seq);
 	void *rc = NULL;
 
-	for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
+	st->offset = 0;
+	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
 		struct sock *sk;
 		struct hlist_nulls_node *node;
 		struct inet_timewait_sock *tw;
@@ -2126,6 +2144,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
 	struct net *net = seq_file_net(seq);
 
 	++st->num;
+	++st->offset;
 
 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
 		tw = cur;
@@ -2142,6 +2161,7 @@ get_tw:
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 
 		/* Look for next non empty bucket */
+		st->offset = 0;
 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
 				empty_bucket(st))
 			;
@@ -2169,7 +2189,11 @@ out:
 
 static void *established_get_idx(struct seq_file *seq, loff_t pos)
 {
-	void *rc = established_get_first(seq);
+	struct tcp_iter_state *st = seq->private;
+	void *rc;
+
+	st->bucket = 0;
+	rc = established_get_first(seq);
 
 	while (rc && pos) {
 		rc = established_get_next(seq, rc);
@@ -2194,24 +2218,72 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
 	return rc;
 }
 
+static void *tcp_seek_last_pos(struct seq_file *seq)
+{
+	struct tcp_iter_state *st = seq->private;
+	int offset = st->offset;
+	int orig_num = st->num;
+	void *rc = NULL;
+
+	switch (st->state) {
+	case TCP_SEQ_STATE_OPENREQ:
+	case TCP_SEQ_STATE_LISTENING:
+		if (st->bucket >= INET_LHTABLE_SIZE)
+			break;
+		st->state = TCP_SEQ_STATE_LISTENING;
+		rc = listening_get_next(seq, NULL);
+		while (offset-- && rc)
+			rc = listening_get_next(seq, rc);
+		if (rc)
+			break;
+		st->bucket = 0;
+		/* Fallthrough */
+	case TCP_SEQ_STATE_ESTABLISHED:
+	case TCP_SEQ_STATE_TIME_WAIT:
+		st->state = TCP_SEQ_STATE_ESTABLISHED;
+		if (st->bucket > tcp_hashinfo.ehash_mask)
+			break;
+		rc = established_get_first(seq);
+		while (offset-- && rc)
+			rc = established_get_next(seq, rc);
+	}
+
+	st->num = orig_num;
+
+	return rc;
+}
+
 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct tcp_iter_state *st = seq->private;
+	void *rc;
+
+	if (*pos && *pos == st->last_pos) {
+		rc = tcp_seek_last_pos(seq);
+		if (rc)
+			goto out;
+	}
+
 	st->state = TCP_SEQ_STATE_LISTENING;
 	st->num = 0;
-	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+	st->bucket = 0;
+	st->offset = 0;
+	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+
+out:
+	st->last_pos = *pos;
+	return rc;
 }
 
 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+	struct tcp_iter_state *st = seq->private;
 	void *rc = NULL;
-	struct tcp_iter_state *st;
 
 	if (v == SEQ_START_TOKEN) {
 		rc = tcp_get_idx(seq, 0);
 		goto out;
 	}
-	st = seq->private;
 
 	switch (st->state) {
 	case TCP_SEQ_STATE_OPENREQ:
@@ -2219,6 +2291,8 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		rc = listening_get_next(seq, v);
 		if (!rc) {
 			st->state = TCP_SEQ_STATE_ESTABLISHED;
+			st->bucket = 0;
+			st->offset = 0;
 			rc	  = established_get_first(seq);
 		}
 		break;
@@ -2229,6 +2303,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	}
 out:
 	++*pos;
+	st->last_pos = *pos;
 	return rc;
 }
 
@@ -2267,6 +2342,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
 
 	s = ((struct seq_file *)file->private_data)->private;
 	s->family		= afinfo->family;
+	s->last_pos 		= 0;
 	return 0;
 }
 
-- 
cgit v1.2.3-55-g7522


From 66018506e15bea62de4eefc3298f170b4bfcf5ef Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Mon, 7 Jun 2010 03:12:08 +0000
Subject: ip: Router Alert RCU conversion

Straightforward conversion to RCU.

One rwlock becomes a spinlock, and is static.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       |  2 +-
 net/ipv4/ip_input.c    | 11 +++--------
 net/ipv4/ip_sockglue.c | 23 ++++++++++++++---------
 3 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 452f229c380a..9982c97f0bdc 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -62,10 +62,10 @@ struct ip_ra_chain {
 	struct ip_ra_chain	*next;
 	struct sock		*sk;
 	void			(*destructor)(struct sock *);
+	struct rcu_head		rcu;
 };
 
 extern struct ip_ra_chain *ip_ra_chain;
-extern rwlock_t ip_ra_lock;
 
 /* IP flags. */
 #define IP_CE		0x8000		/* Flag: "Congestion"		*/
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d52c9da644cf..d274078b1665 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -146,7 +146,7 @@
 #include <linux/netlink.h>
 
 /*
- *	Process Router Attention IP option
+ *	Process Router Attention IP option (RFC 2113)
  */
 int ip_call_ra_chain(struct sk_buff *skb)
 {
@@ -155,8 +155,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
 	struct sock *last = NULL;
 	struct net_device *dev = skb->dev;
 
-	read_lock(&ip_ra_lock);
-	for (ra = ip_ra_chain; ra; ra = ra->next) {
+	for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
 		struct sock *sk = ra->sk;
 
 		/* If socket is bound to an interface, only report
@@ -167,10 +166,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
 		     sk->sk_bound_dev_if == dev->ifindex) &&
 		    net_eq(sock_net(sk), dev_net(dev))) {
 			if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-				if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) {
-					read_unlock(&ip_ra_lock);
+				if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN))
 					return 1;
-				}
 			}
 			if (last) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
@@ -183,10 +180,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
 
 	if (last) {
 		raw_rcv(last, skb);
-		read_unlock(&ip_ra_lock);
 		return 1;
 	}
-	read_unlock(&ip_ra_lock);
 	return 0;
 }
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ce231780a2b1..08b9519a24f4 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -239,7 +239,12 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
    sent to multicast group to reach destination designated router.
  */
 struct ip_ra_chain *ip_ra_chain;
-DEFINE_RWLOCK(ip_ra_lock);
+static DEFINE_SPINLOCK(ip_ra_lock);
+
+static void ip_ra_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct ip_ra_chain, rcu));
+}
 
 int ip_ra_control(struct sock *sk, unsigned char on,
 		  void (*destructor)(struct sock *))
@@ -251,35 +256,35 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 
 	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
-	write_lock_bh(&ip_ra_lock);
+	spin_lock_bh(&ip_ra_lock);
 	for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
 		if (ra->sk == sk) {
 			if (on) {
-				write_unlock_bh(&ip_ra_lock);
+				spin_unlock_bh(&ip_ra_lock);
 				kfree(new_ra);
 				return -EADDRINUSE;
 			}
-			*rap = ra->next;
-			write_unlock_bh(&ip_ra_lock);
+			rcu_assign_pointer(*rap, ra->next);
+			spin_unlock_bh(&ip_ra_lock);
 
 			if (ra->destructor)
 				ra->destructor(sk);
 			sock_put(sk);
-			kfree(ra);
+			call_rcu(&ra->rcu, ip_ra_free_rcu);
 			return 0;
 		}
 	}
 	if (new_ra == NULL) {
-		write_unlock_bh(&ip_ra_lock);
+		spin_unlock_bh(&ip_ra_lock);
 		return -ENOBUFS;
 	}
 	new_ra->sk = sk;
 	new_ra->destructor = destructor;
 
 	new_ra->next = ra;
-	*rap = new_ra;
+	rcu_assign_pointer(*rap, new_ra);
 	sock_hold(sk);
-	write_unlock_bh(&ip_ra_lock);
+	spin_unlock_bh(&ip_ra_lock);
 
 	return 0;
 }
-- 
cgit v1.2.3-55-g7522


From 339bb99e4a8ba1f8960eed21d50be808b35ad22a Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Tue, 8 Jun 2010 14:11:19 +0200
Subject: netfilter: xt_rateest: Better struct xt_rateest layout

We currently dirty two cache lines in struct xt_rateest, this hurts SMP
performance.

This patch moves lock/bstats/rstats at beginning of structure so that
they share a single cache line.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/xt_rateest.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index ddbf37e19616..b1d780e21ce3 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -2,13 +2,17 @@
 #define _XT_RATEEST_H
 
 struct xt_rateest {
+	/* keep lock and bstats on same cache line to speedup xt_rateest_tg() */
+	struct gnet_stats_basic_packed	bstats;
+	spinlock_t			lock;
+	/* keep rstats and lock on same cache line to speedup xt_rateest_mt() */
+	struct gnet_stats_rate_est	rstats;
+
+	/* following fields not accessed in hot path */
 	struct hlist_node		list;
 	char				name[IFNAMSIZ];
 	unsigned int			refcnt;
-	spinlock_t			lock;
 	struct gnet_estimator		params;
-	struct gnet_stats_rate_est	rstats;
-	struct gnet_stats_basic_packed	bstats;
 };
 
 extern struct xt_rateest *xt_rateest_lookup(const char *name);
-- 
cgit v1.2.3-55-g7522


From 5bfddbd46a95c978f4d3c992339cbdf4f4b790a3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Tue, 8 Jun 2010 16:09:52 +0200
Subject: netfilter: nf_conntrack: IPS_UNTRACKED bit

NOTRACK makes all cpus share a cache line on nf_conntrack_untracked
twice per packet. This is bad for performance.
__read_mostly annotation is also a bad choice.

This patch introduces IPS_UNTRACKED bit so that we can use later a
per_cpu untrack structure more easily.

A new helper, nf_ct_untracked_get() returns a pointer to
nf_conntrack_untracked.

Another one, nf_ct_untracked_status_or() is used by nf_nat_init() to add
IPS_NAT_DONE_MASK bits to untracked status.

nf_ct_is_untracked() prototype is changed to work on a nf_conn pointer.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_common.h  |  4 ++++
 include/net/netfilter/nf_conntrack.h           | 12 +++++++++---
 include/net/netfilter/nf_conntrack_core.h      |  2 +-
 net/ipv4/netfilter/nf_nat_core.c               |  2 +-
 net/ipv4/netfilter/nf_nat_standalone.c         |  2 +-
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  2 +-
 net/netfilter/nf_conntrack_core.c              | 11 ++++++++---
 net/netfilter/nf_conntrack_netlink.c           |  2 +-
 net/netfilter/xt_CT.c                          |  4 ++--
 net/netfilter/xt_NOTRACK.c                     |  2 +-
 net/netfilter/xt_TEE.c                         |  4 ++--
 net/netfilter/xt_cluster.c                     |  2 +-
 net/netfilter/xt_conntrack.c                   | 11 ++++++-----
 net/netfilter/xt_socket.c                      |  2 +-
 net/netfilter/xt_state.c                       | 14 ++++++++------
 15 files changed, 47 insertions(+), 29 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 14e6d32002c4..1afd18c855ec 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -76,6 +76,10 @@ enum ip_conntrack_status {
 	/* Conntrack is a template */
 	IPS_TEMPLATE_BIT = 11,
 	IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT),
+
+	/* Conntrack is a fake untracked entry */
+	IPS_UNTRACKED_BIT = 12,
+	IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
 };
 
 /* Connection tracking event types */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index bde095f7e845..3bc38c70bbbe 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -261,7 +261,13 @@ extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			       u32 seq);
 
 /* Fake conntrack entry for untracked connections */
-extern struct nf_conn nf_conntrack_untracked;
+static inline struct nf_conn *nf_ct_untracked_get(void)
+{
+	extern struct nf_conn nf_conntrack_untracked;
+
+	return &nf_conntrack_untracked;
+}
+extern void nf_ct_untracked_status_or(unsigned long bits);
 
 /* Iterate over all conntracks: if iter returns true, it's deleted. */
 extern void
@@ -289,9 +295,9 @@ static inline int nf_ct_is_dying(struct nf_conn *ct)
 	return test_bit(IPS_DYING_BIT, &ct->status);
 }
 
-static inline int nf_ct_is_untracked(const struct sk_buff *skb)
+static inline int nf_ct_is_untracked(const struct nf_conn *ct)
 {
-	return (skb->nfct == &nf_conntrack_untracked.ct_general);
+	return test_bit(IPS_UNTRACKED_BIT, &ct->status);
 }
 
 extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 3d7524fba194..aced085132e7 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -60,7 +60,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
 	struct nf_conn *ct = (struct nf_conn *)skb->nfct;
 	int ret = NF_ACCEPT;
 
-	if (ct && ct != &nf_conntrack_untracked) {
+	if (ct && !nf_ct_is_untracked(ct)) {
 		if (!nf_ct_is_confirmed(ct))
 			ret = __nf_conntrack_confirm(skb);
 		if (likely(ret == NF_ACCEPT))
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 4f8bddb760c9..c7719b283ada 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -742,7 +742,7 @@ static int __init nf_nat_init(void)
 	spin_unlock_bh(&nf_nat_lock);
 
 	/* Initialize fake conntrack so that NAT will skip it */
-	nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+	nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
 
 	l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
 
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index beb25819c9c9..6723c682250d 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -98,7 +98,7 @@ nf_nat_fn(unsigned int hooknum,
 		return NF_ACCEPT;
 
 	/* Don't try to NAT if this packet is not conntracked */
-	if (ct == &nf_conntrack_untracked)
+	if (nf_ct_is_untracked(ct))
 		return NF_ACCEPT;
 
 	nat = nfct_nat(ct);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 9be81776415e..1df3c8b6bf47 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -208,7 +208,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
 	type = icmp6h->icmp6_type - 130;
 	if (type >= 0 && type < sizeof(noct_valid_new) &&
 	    noct_valid_new[type]) {
-		skb->nfct = &nf_conntrack_untracked.ct_general;
+		skb->nfct = &nf_ct_untracked_get()->ct_general;
 		skb->nfctinfo = IP_CT_NEW;
 		nf_conntrack_get(skb->nfct);
 		return NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index eeeb8bc73982..6c1da212380d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 unsigned int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
 
-struct nf_conn nf_conntrack_untracked __read_mostly;
+struct nf_conn nf_conntrack_untracked;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
 static int nf_conntrack_hash_rnd_initted;
@@ -1321,6 +1321,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 		  &nf_conntrack_htable_size, 0600);
 
+void nf_ct_untracked_status_or(unsigned long bits)
+{
+	nf_conntrack_untracked.status |= bits;
+}
+EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
+
 static int nf_conntrack_init_init_net(void)
 {
 	int max_factor = 8;
@@ -1368,8 +1374,7 @@ static int nf_conntrack_init_init_net(void)
 #endif
 	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
 	/*  - and look it like as a confirmed connection */
-	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
-
+	nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
 	return 0;
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c42ff6aa441d..5bae1cd15eea 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -480,7 +480,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	int err;
 
 	/* ignore our fake conntrack entry */
-	if (ct == &nf_conntrack_untracked)
+	if (nf_ct_is_untracked(ct))
 		return 0;
 
 	if (events & (1 << IPCT_DESTROY)) {
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 562bf3266e04..0cb6053f02fd 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -67,7 +67,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 		return -EINVAL;
 
 	if (info->flags & XT_CT_NOTRACK) {
-		ct = &nf_conntrack_untracked;
+		ct = nf_ct_untracked_get();
 		atomic_inc(&ct->ct_general.use);
 		goto out;
 	}
@@ -132,7 +132,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
 	struct nf_conn *ct = info->ct;
 	struct nf_conn_help *help;
 
-	if (ct != &nf_conntrack_untracked) {
+	if (!nf_ct_is_untracked(ct)) {
 		help = nfct_help(ct);
 		if (help)
 			module_put(help->helper->me);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 512b9123252f..9d782181b6c8 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -23,7 +23,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	   If there is a real ct entry correspondig to this packet,
 	   it'll hang aroun till timing out. We don't deal with it
 	   for performance reasons. JK */
-	skb->nfct = &nf_conntrack_untracked.ct_general;
+	skb->nfct = &nf_ct_untracked_get()->ct_general;
 	skb->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get(skb->nfct);
 
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 859d9fd429c8..7a118267c4c4 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -104,7 +104,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 #ifdef WITH_CONNTRACK
 	/* Avoid counting cloned packets towards the original connection. */
 	nf_conntrack_put(skb->nfct);
-	skb->nfct     = &nf_conntrack_untracked.ct_general;
+	skb->nfct     = &nf_ct_untracked_get()->ct_general;
 	skb->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get(skb->nfct);
 #endif
@@ -177,7 +177,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 
 #ifdef WITH_CONNTRACK
 	nf_conntrack_put(skb->nfct);
-	skb->nfct     = &nf_conntrack_untracked.ct_general;
+	skb->nfct     = &nf_ct_untracked_get()->ct_general;
 	skb->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get(skb->nfct);
 #endif
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 30b95a1c1c89..f4af1bfafb1c 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -120,7 +120,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (ct == NULL)
 		return false;
 
-	if (ct == &nf_conntrack_untracked)
+	if (nf_ct_is_untracked(ct))
 		return false;
 
 	if (ct->master)
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 39681f10291c..e536710ad916 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -123,11 +123,12 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
 
 	ct = nf_ct_get(skb, &ctinfo);
 
-	if (ct == &nf_conntrack_untracked)
-		statebit = XT_CONNTRACK_STATE_UNTRACKED;
-	else if (ct != NULL)
-		statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
-	else
+	if (ct) {
+		if (nf_ct_is_untracked(ct))
+			statebit = XT_CONNTRACK_STATE_UNTRACKED;
+		else
+			statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
+	} else
 		statebit = XT_CONNTRACK_STATE_INVALID;
 
 	if (info->match_flags & XT_CONNTRACK_STATE) {
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 3d54c236a1ba..1ca89908cbad 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -127,7 +127,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	 * reply packet of an established SNAT-ted connection. */
 
 	ct = nf_ct_get(skb, &ctinfo);
-	if (ct && (ct != &nf_conntrack_untracked) &&
+	if (ct && !nf_ct_is_untracked(ct) &&
 	    ((iph->protocol != IPPROTO_ICMP &&
 	      ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) ||
 	     (iph->protocol == IPPROTO_ICMP &&
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index e12e053d3782..a507922d80cd 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -26,14 +26,16 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct xt_state_info *sinfo = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
 	unsigned int statebit;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 
-	if (nf_ct_is_untracked(skb))
-		statebit = XT_STATE_UNTRACKED;
-	else if (!nf_ct_get(skb, &ctinfo))
+	if (!ct)
 		statebit = XT_STATE_INVALID;
-	else
-		statebit = XT_STATE_BIT(ctinfo);
-
+	else {
+		if (nf_ct_is_untracked(ct))
+			statebit = XT_STATE_UNTRACKED;
+		else
+			statebit = XT_STATE_BIT(ctinfo);
+	}
 	return (sinfo->statemask & statebit);
 }
 
-- 
cgit v1.2.3-55-g7522


From b3c5163fe0193a74016dba1bb22491e0d1e9aaa4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Wed, 9 Jun 2010 14:43:38 +0200
Subject: netfilter: nf_conntrack: per_cpu untracking

NOTRACK makes all cpus share a cache line on nf_conntrack_untracked
twice per packet, slowing down performance.

This patch converts it to a per_cpu variable.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack.h |  5 ++---
 net/netfilter/nf_conntrack_core.c    | 36 ++++++++++++++++++++++++++----------
 2 files changed, 28 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 3bc38c70bbbe..84a4b6fec99d 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -261,11 +261,10 @@ extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			       u32 seq);
 
 /* Fake conntrack entry for untracked connections */
+DECLARE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
 static inline struct nf_conn *nf_ct_untracked_get(void)
 {
-	extern struct nf_conn nf_conntrack_untracked;
-
-	return &nf_conntrack_untracked;
+	return &__raw_get_cpu_var(nf_conntrack_untracked);
 }
 extern void nf_ct_untracked_status_or(unsigned long bits);
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 6c1da212380d..9c661413b826 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -62,8 +62,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 unsigned int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
 
-struct nf_conn nf_conntrack_untracked;
-EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
+DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
+EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
 
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
@@ -1183,10 +1183,21 @@ static void nf_ct_release_dying_list(struct net *net)
 	spin_unlock_bh(&nf_conntrack_lock);
 }
 
+static int untrack_refs(void)
+{
+	int cnt = 0, cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
+
+		cnt += atomic_read(&ct->ct_general.use) - 1;
+	}
+	return cnt;
+}
+
 static void nf_conntrack_cleanup_init_net(void)
 {
-	/* wait until all references to nf_conntrack_untracked are dropped */
-	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
+	while (untrack_refs() > 0)
 		schedule();
 
 	nf_conntrack_helper_fini();
@@ -1323,14 +1334,17 @@ module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 
 void nf_ct_untracked_status_or(unsigned long bits)
 {
-	nf_conntrack_untracked.status |= bits;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(nf_conntrack_untracked, cpu).status |= bits;
 }
 EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
 
 static int nf_conntrack_init_init_net(void)
 {
 	int max_factor = 8;
-	int ret;
+	int ret, cpu;
 
 	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
 	 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
@@ -1369,10 +1383,12 @@ static int nf_conntrack_init_init_net(void)
 		goto err_extend;
 #endif
 	/* Set up fake conntrack: to never be deleted, not in any hashes */
-#ifdef CONFIG_NET_NS
-	nf_conntrack_untracked.ct_net = &init_net;
-#endif
-	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
+	for_each_possible_cpu(cpu) {
+		struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
+
+		write_pnet(&ct->ct_net, &init_net);
+		atomic_set(&ct->ct_general.use, 1);
+	}
 	/*  - and look it like as a confirmed connection */
 	nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
 	return 0;
-- 
cgit v1.2.3-55-g7522


From 88e7594a9775e54dcd421cb246406dce62e48bee Mon Sep 17 00:00:00 2001
From: Jiri Pirko
Date: Mon, 7 Jun 2010 03:27:39 +0000
Subject: phonet: use call_rcu for phonet device free

Use call_rcu rather than synchronize_rcu.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/pn_dev.h |  1 +
 net/phonet/pn_dev.c         | 15 +++++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index d7b989ca3d63..2d16783d5e20 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -34,6 +34,7 @@ struct phonet_device {
 	struct list_head list;
 	struct net_device *netdev;
 	DECLARE_BITMAP(addrs, 64);
+	struct rcu_head	rcu;
 };
 
 int phonet_device_init(void);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index c33da6576942..b18e48fae975 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -162,6 +162,14 @@ int phonet_address_add(struct net_device *dev, u8 addr)
 	return err;
 }
 
+static void phonet_device_rcu_free(struct rcu_head *head)
+{
+	struct phonet_device *pnd;
+
+	pnd = container_of(head, struct phonet_device, rcu);
+	kfree(pnd);
+}
+
 int phonet_address_del(struct net_device *dev, u8 addr)
 {
 	struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
@@ -179,10 +187,9 @@ int phonet_address_del(struct net_device *dev, u8 addr)
 		pnd = NULL;
 	mutex_unlock(&pndevs->lock);
 
-	if (pnd) {
-		synchronize_rcu();
-		kfree(pnd);
-	}
+	if (pnd)
+		call_rcu(&pnd->rcu, phonet_device_rcu_free);
+
 	return err;
 }
 
-- 
cgit v1.2.3-55-g7522


From 592fcb9dfafaa02dd0edc207bf5d3a0ee7a1f8df Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Wed, 9 Jun 2010 16:21:07 +0000
Subject: ip: ip_ra_control() rcu fix

commit 66018506e15b (ip: Router Alert RCU conversion) introduced RCU
lookups to ip_call_ra_chain(). It missed proper deinit phase :
When ip_ra_control() deletes an ip_ra_chain, it should make sure
ip_call_ra_chain() users can not start to use socket during the rcu
grace period. It should also delay the sock_put() after the grace
period, or we risk a premature socket freeing and corruptions, as
raw sockets are not rcu protected yet.

This delay avoids using expensive atomic_inc_not_zero() in
ip_call_ra_chain().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       |  5 ++++-
 net/ipv4/ip_sockglue.c | 19 +++++++++++++++----
 2 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 9982c97f0bdc..d52f01180361 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -61,7 +61,10 @@ struct ipcm_cookie {
 struct ip_ra_chain {
 	struct ip_ra_chain	*next;
 	struct sock		*sk;
-	void			(*destructor)(struct sock *);
+	union {
+		void			(*destructor)(struct sock *);
+		struct sock		*saved_sk;
+	};
 	struct rcu_head		rcu;
 };
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 08b9519a24f4..47fff528ff39 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -241,9 +241,13 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
 struct ip_ra_chain *ip_ra_chain;
 static DEFINE_SPINLOCK(ip_ra_lock);
 
-static void ip_ra_free_rcu(struct rcu_head *head)
+
+static void ip_ra_destroy_rcu(struct rcu_head *head)
 {
-	kfree(container_of(head, struct ip_ra_chain, rcu));
+	struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
+
+	sock_put(ra->saved_sk);
+	kfree(ra);
 }
 
 int ip_ra_control(struct sock *sk, unsigned char on,
@@ -264,13 +268,20 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 				kfree(new_ra);
 				return -EADDRINUSE;
 			}
+			/* dont let ip_call_ra_chain() use sk again */
+			ra->sk = NULL;
 			rcu_assign_pointer(*rap, ra->next);
 			spin_unlock_bh(&ip_ra_lock);
 
 			if (ra->destructor)
 				ra->destructor(sk);
-			sock_put(sk);
-			call_rcu(&ra->rcu, ip_ra_free_rcu);
+			/*
+			 * Delay sock_put(sk) and kfree(ra) after one rcu grace
+			 * period. This guarantee ip_call_ra_chain() dont need
+			 * to mess with socket refcounts.
+			 */
+			ra->saved_sk = sk;
+			call_rcu(&ra->rcu, ip_ra_destroy_rcu);
 			return 0;
 		}
 	}
-- 
cgit v1.2.3-55-g7522


From d8d1f30b95a635dbd610dcc5eb641aca8f4768cf Mon Sep 17 00:00:00 2001
From: Changli Gao
Date: Thu, 10 Jun 2010 23:31:35 -0700
Subject: net-next: remove useless union keyword

remove useless union keyword in rtable, rt6_info and dn_route.

Since there is only one member in a union, the union keyword isn't useful.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/core/addr.c          |   2 +-
 drivers/infiniband/hw/cxgb3/iwch_cm.c   |   4 +-
 drivers/infiniband/hw/cxgb4/cm.c        |   4 +-
 drivers/infiniband/hw/nes/nes_cm.c      |   2 +-
 drivers/net/bonding/bond_main.c         |   6 +-
 drivers/net/cnic.c                      |   2 +-
 drivers/scsi/cxgb3i/cxgb3i_offload.c    |   4 +-
 include/net/dn_route.h                  |   4 +-
 include/net/ip6_fib.h                   |  10 +-
 include/net/ipip.h                      |   2 +-
 include/net/route.h                     |   6 +-
 net/atm/clip.c                          |   2 +-
 net/bridge/br_device.c                  |   2 +-
 net/bridge/br_netfilter.c               |  20 +-
 net/dccp/ipv4.c                         |   4 +-
 net/decnet/dn_route.c                   | 158 ++++++------
 net/ethernet/eth.c                      |   5 +-
 net/ipv4/af_inet.c                      |   4 +-
 net/ipv4/arp.c                          |  12 +-
 net/ipv4/datagram.c                     |   2 +-
 net/ipv4/icmp.c                         |  18 +-
 net/ipv4/igmp.c                         |  10 +-
 net/ipv4/inet_connection_sock.c         |   2 +-
 net/ipv4/ip_forward.c                   |  10 +-
 net/ipv4/ip_gre.c                       |  14 +-
 net/ipv4/ip_input.c                     |   4 +-
 net/ipv4/ip_output.c                    |  60 ++---
 net/ipv4/ipip.c                         |   8 +-
 net/ipv4/ipmr.c                         |   8 +-
 net/ipv4/netfilter.c                    |   8 +-
 net/ipv4/raw.c                          |  16 +-
 net/ipv4/route.c                        | 420 ++++++++++++++++----------------
 net/ipv4/syncookies.c                   |   6 +-
 net/ipv4/tcp_ipv4.c                     |   2 +-
 net/ipv4/udp.c                          |   4 +-
 net/ipv4/xfrm4_policy.c                 |   2 +-
 net/ipv6/addrconf.c                     |  10 +-
 net/ipv6/anycast.c                      |   6 +-
 net/ipv6/fib6_rules.c                   |  10 +-
 net/ipv6/ip6_fib.c                      |  30 +--
 net/ipv6/ip6_output.c                   |  38 +--
 net/ipv6/ip6_tunnel.c                   |   8 +-
 net/ipv6/mcast.c                        |   4 +-
 net/ipv6/ndisc.c                        |   8 +-
 net/ipv6/raw.c                          |  12 +-
 net/ipv6/route.c                        | 300 +++++++++++------------
 net/ipv6/sit.c                          |   8 +-
 net/l2tp/l2tp_ip.c                      |   6 +-
 net/netfilter/ipvs/ip_vs_xmit.c         |  86 +++----
 net/netfilter/nf_conntrack_h323_main.c  |  12 +-
 net/netfilter/nf_conntrack_netbios_ns.c |   2 +-
 net/netfilter/xt_TCPMSS.c               |   4 +-
 net/netfilter/xt_TEE.c                  |   4 +-
 net/rxrpc/ar-peer.c                     |   4 +-
 net/sctp/protocol.c                     |   4 +-
 55 files changed, 694 insertions(+), 709 deletions(-)

(limited to 'include/net')

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 0b926e45afe2..a5ea1bce9689 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -215,7 +215,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
 
 	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
 	if (!neigh || !(neigh->nud_state & NUD_VALID)) {
-		neigh_event_send(rt->u.dst.neighbour, NULL);
+		neigh_event_send(rt->dst.neighbour, NULL);
 		ret = -ENODATA;
 		if (neigh)
 			goto release;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index ebfb117ba68b..abd683ea326d 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1364,7 +1364,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 		       __func__);
 		goto reject;
 	}
-	dst = &rt->u.dst;
+	dst = &rt->dst;
 	l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev);
 	if (!l2t) {
 		printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
@@ -1932,7 +1932,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 		err = -EHOSTUNREACH;
 		goto fail3;
 	}
-	ep->dst = &rt->u.dst;
+	ep->dst = &rt->dst;
 
 	/* get a l2t entry */
 	ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour,
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 30ce0a8eca09..8b693c8c25e2 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1364,7 +1364,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 		       __func__);
 		goto reject;
 	}
-	dst = &rt->u.dst;
+	dst = &rt->dst;
 	if (dst->neighbour->dev->flags & IFF_LOOPBACK) {
 		pdev = ip_dev_find(&init_net, peer_ip);
 		BUG_ON(!pdev);
@@ -1938,7 +1938,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 		err = -EHOSTUNREACH;
 		goto fail3;
 	}
-	ep->dst = &rt->u.dst;
+	ep->dst = &rt->dst;
 
 	/* get a l2t entry */
 	if (ep->dst->neighbour->dev->flags & IFF_LOOPBACK) {
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 986d6f32dded..d876d0435cd4 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1146,7 +1146,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
 	}
 
 	if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID)))
-		neigh_event_send(rt->u.dst.neighbour, NULL);
+		neigh_event_send(rt->dst.neighbour, NULL);
 
 	ip_rt_put(rt);
 	return rc;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1b19276cff12..ac4f94b7da37 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2584,7 +2584,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 		/*
 		 * This target is not on a VLAN
 		 */
-		if (rt->u.dst.dev == bond->dev) {
+		if (rt->dst.dev == bond->dev) {
 			ip_rt_put(rt);
 			pr_debug("basa: rtdev == bond->dev: arp_send\n");
 			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
@@ -2595,7 +2595,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 		vlan_id = 0;
 		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
 			vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
-			if (vlan_dev == rt->u.dst.dev) {
+			if (vlan_dev == rt->dst.dev) {
 				vlan_id = vlan->vlan_id;
 				pr_debug("basa: vlan match on %s %d\n",
 				       vlan_dev->name, vlan_id);
@@ -2613,7 +2613,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 		if (net_ratelimit()) {
 			pr_warning("%s: no path to arp_ip_target %pI4 via rt.dev %s\n",
 				   bond->dev->name, &fl.fl4_dst,
-				   rt->u.dst.dev ? rt->u.dst.dev->name : "NULL");
+				   rt->dst.dev ? rt->dst.dev->name : "NULL");
 		}
 		ip_rt_put(rt);
 	}
diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
index fe925663d39a..908d89a4fe86 100644
--- a/drivers/net/cnic.c
+++ b/drivers/net/cnic.c
@@ -2824,7 +2824,7 @@ static int cnic_get_v4_route(struct sockaddr_in *dst_addr,
 
 	err = ip_route_output_key(&init_net, &rt, &fl);
 	if (!err)
-		*dst = &rt->u.dst;
+		*dst = &rt->dst;
 	return err;
 #else
 	return -ENETUNREACH;
diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.c b/drivers/scsi/cxgb3i/cxgb3i_offload.c
index a175be9c496f..3b6a06eebf7f 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_offload.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_offload.c
@@ -1587,7 +1587,7 @@ cxgb3i_find_dev(struct net_device *dev, __be32 ipaddr)
 
 	err = ip_route_output_key(dev ? dev_net(dev) : &init_net, &rt, &fl);
 	if (!err)
-		return (&rt->u.dst)->dev;
+		return (&rt->dst)->dev;
 
 	return NULL;
 }
@@ -1649,7 +1649,7 @@ int cxgb3i_c3cn_connect(struct net_device *dev, struct s3_conn *c3cn,
 		c3cn->saddr.sin_addr.s_addr = rt->rt_src;
 
 	/* now commit destination to connection */
-	c3cn->dst_cache = &rt->u.dst;
+	c3cn->dst_cache = &rt->dst;
 
 	/* try to establish an offloaded connection */
 	dev = cxgb3_egress_dev(c3cn->dst_cache->dev, c3cn, 0);
diff --git a/include/net/dn_route.h b/include/net/dn_route.h
index 60c9f22d8694..ccadab3aa3f6 100644
--- a/include/net/dn_route.h
+++ b/include/net/dn_route.h
@@ -65,9 +65,7 @@ extern void dn_rt_cache_flush(int delay);
  * packets to the originating host.
  */
 struct dn_route {
-	union {
-		struct dst_entry dst;
-	} u;
+	struct dst_entry dst;
 
 	struct flowi fl;
 
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 4b1dc1161c37..062a823d311c 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -84,13 +84,11 @@ struct rt6key {
 struct fib6_table;
 
 struct rt6_info {
-	union {
-		struct dst_entry	dst;
-	} u;
+	struct dst_entry		dst;
 
-#define rt6i_dev			u.dst.dev
-#define rt6i_nexthop			u.dst.neighbour
-#define rt6i_expires			u.dst.expires
+#define rt6i_dev			dst.dev
+#define rt6i_nexthop			dst.neighbour
+#define rt6i_expires			dst.expires
 
 	/*
 	 * Tail elements of dst_entry (__refcnt etc.)
diff --git a/include/net/ipip.h b/include/net/ipip.h
index 11e8513d2d07..65caea8b414f 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -50,7 +50,7 @@ struct ip_tunnel_prl_entry {
 	int pkt_len = skb->len - skb_transport_offset(skb);		\
 									\
 	skb->ip_summed = CHECKSUM_NONE;					\
-	ip_select_ident(iph, &rt->u.dst, NULL);				\
+	ip_select_ident(iph, &rt->dst, NULL);				\
 									\
 	err = ip_local_out(skb);					\
 	if (likely(net_xmit_eval(err) == 0)) {				\
diff --git a/include/net/route.h b/include/net/route.h
index af6cf4b4c9dc..bd732d62e1c3 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -50,9 +50,7 @@
 struct fib_nh;
 struct inet_peer;
 struct rtable {
-	union {
-		struct dst_entry	dst;
-	} u;
+	struct dst_entry	dst;
 
 	/* Cache lookup keys */
 	struct flowi		fl;
@@ -144,7 +142,7 @@ extern void fib_add_ifaddr(struct in_ifaddr *);
 static inline void ip_rt_put(struct rtable * rt)
 {
 	if (rt)
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 }
 
 #define IPTOS_RT_MASK	(IPTOS_TOS_MASK & ~3)
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 313aba11316b..95fdd1185067 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -522,7 +522,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
 	error = ip_route_output_key(&init_net, &rt, &fl);
 	if (error)
 		return error;
-	neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1);
+	neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1);
 	ip_rt_put(rt);
 	if (!neigh)
 		return -ENOMEM;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index eedf2c94820e..b898364beaf5 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -127,7 +127,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
 
 #ifdef CONFIG_BRIDGE_NETFILTER
 	/* remember the MTU in the rtable for PMTU */
-	br->fake_rtable.u.dst.metrics[RTAX_MTU - 1] = new_mtu;
+	br->fake_rtable.dst.metrics[RTAX_MTU - 1] = new_mtu;
 #endif
 
 	return 0;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 44420992f72f..0685b2558ab5 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -117,12 +117,12 @@ void br_netfilter_rtable_init(struct net_bridge *br)
 {
 	struct rtable *rt = &br->fake_rtable;
 
-	atomic_set(&rt->u.dst.__refcnt, 1);
-	rt->u.dst.dev = br->dev;
-	rt->u.dst.path = &rt->u.dst;
-	rt->u.dst.metrics[RTAX_MTU - 1] = 1500;
-	rt->u.dst.flags	= DST_NOXFRM;
-	rt->u.dst.ops = &fake_dst_ops;
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->dst.dev = br->dev;
+	rt->dst.path = &rt->dst;
+	rt->dst.metrics[RTAX_MTU - 1] = 1500;
+	rt->dst.flags	= DST_NOXFRM;
+	rt->dst.ops = &fake_dst_ops;
 }
 
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
@@ -244,8 +244,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 		kfree_skb(skb);
 		return 0;
 	}
-	dst_hold(&rt->u.dst);
-	skb_dst_set(skb, &rt->u.dst);
+	dst_hold(&rt->dst);
+	skb_dst_set(skb, &rt->dst);
 
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_update_protocol(skb);
@@ -396,8 +396,8 @@ bridged_dnat:
 			kfree_skb(skb);
 			return 0;
 		}
-		dst_hold(&rt->u.dst);
-		skb_dst_set(skb, &rt->u.dst);
+		dst_hold(&rt->dst);
+		skb_dst_set(skb, &rt->dst);
 	}
 
 	skb->dev = nf_bridge->physindev;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d9b11ef8694c..d4a166f0f391 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -105,7 +105,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		goto failure;
 
 	/* OK, now commit destination to socket.  */
-	sk_setup_caps(sk, &rt->u.dst);
+	sk_setup_caps(sk, &rt->dst);
 
 	dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr,
 						    inet->inet_daddr,
@@ -475,7 +475,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 		return NULL;
 	}
 
-	return &rt->u.dst;
+	return &rt->dst;
 }
 
 static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 812e6dff6067..6585ea6d1182 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -146,13 +146,13 @@ static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
 
 static inline void dnrt_free(struct dn_route *rt)
 {
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static inline void dnrt_drop(struct dn_route *rt)
 {
-	dst_release(&rt->u.dst);
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	dst_release(&rt->dst);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static void dn_dst_check_expire(unsigned long dummy)
@@ -167,13 +167,13 @@ static void dn_dst_check_expire(unsigned long dummy)
 
 		spin_lock(&dn_rt_hash_table[i].lock);
 		while((rt=*rtp) != NULL) {
-			if (atomic_read(&rt->u.dst.__refcnt) ||
-					(now - rt->u.dst.lastuse) < expire) {
-				rtp = &rt->u.dst.dn_next;
+			if (atomic_read(&rt->dst.__refcnt) ||
+					(now - rt->dst.lastuse) < expire) {
+				rtp = &rt->dst.dn_next;
 				continue;
 			}
-			*rtp = rt->u.dst.dn_next;
-			rt->u.dst.dn_next = NULL;
+			*rtp = rt->dst.dn_next;
+			rt->dst.dn_next = NULL;
 			dnrt_free(rt);
 		}
 		spin_unlock(&dn_rt_hash_table[i].lock);
@@ -198,13 +198,13 @@ static int dn_dst_gc(struct dst_ops *ops)
 		rtp = &dn_rt_hash_table[i].chain;
 
 		while((rt=*rtp) != NULL) {
-			if (atomic_read(&rt->u.dst.__refcnt) ||
-					(now - rt->u.dst.lastuse) < expire) {
-				rtp = &rt->u.dst.dn_next;
+			if (atomic_read(&rt->dst.__refcnt) ||
+					(now - rt->dst.lastuse) < expire) {
+				rtp = &rt->dst.dn_next;
 				continue;
 			}
-			*rtp = rt->u.dst.dn_next;
-			rt->u.dst.dn_next = NULL;
+			*rtp = rt->dst.dn_next;
+			rt->dst.dn_next = NULL;
 			dnrt_drop(rt);
 			break;
 		}
@@ -287,25 +287,25 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route *
 	while((rth = *rthp) != NULL) {
 		if (compare_keys(&rth->fl, &rt->fl)) {
 			/* Put it first */
-			*rthp = rth->u.dst.dn_next;
-			rcu_assign_pointer(rth->u.dst.dn_next,
+			*rthp = rth->dst.dn_next;
+			rcu_assign_pointer(rth->dst.dn_next,
 					   dn_rt_hash_table[hash].chain);
 			rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
 
-			dst_use(&rth->u.dst, now);
+			dst_use(&rth->dst, now);
 			spin_unlock_bh(&dn_rt_hash_table[hash].lock);
 
 			dnrt_drop(rt);
 			*rp = rth;
 			return 0;
 		}
-		rthp = &rth->u.dst.dn_next;
+		rthp = &rth->dst.dn_next;
 	}
 
-	rcu_assign_pointer(rt->u.dst.dn_next, dn_rt_hash_table[hash].chain);
+	rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
 	rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
 
-	dst_use(&rt->u.dst, now);
+	dst_use(&rt->dst, now);
 	spin_unlock_bh(&dn_rt_hash_table[hash].lock);
 	*rp = rt;
 	return 0;
@@ -323,8 +323,8 @@ static void dn_run_flush(unsigned long dummy)
 			goto nothing_to_declare;
 
 		for(; rt; rt=next) {
-			next = rt->u.dst.dn_next;
-			rt->u.dst.dn_next = NULL;
+			next = rt->dst.dn_next;
+			rt->dst.dn_next = NULL;
 			dst_free((struct dst_entry *)rt);
 		}
 
@@ -743,7 +743,7 @@ static int dn_forward(struct sk_buff *skb)
 	/* Ensure that we have enough space for headers */
 	rt = (struct dn_route *)skb_dst(skb);
 	header_len = dn_db->use_long ? 21 : 6;
-	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+header_len))
+	if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len))
 		goto drop;
 
 	/*
@@ -752,7 +752,7 @@ static int dn_forward(struct sk_buff *skb)
 	if (++cb->hops > 30)
 		goto drop;
 
-	skb->dev = rt->u.dst.dev;
+	skb->dev = rt->dst.dev;
 
 	/*
 	 * If packet goes out same interface it came in on, then set
@@ -792,7 +792,7 @@ static int dn_rt_bug(struct sk_buff *skb)
 static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 {
 	struct dn_fib_info *fi = res->fi;
-	struct net_device *dev = rt->u.dst.dev;
+	struct net_device *dev = rt->dst.dev;
 	struct neighbour *n;
 	unsigned mss;
 
@@ -800,25 +800,25 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 		if (DN_FIB_RES_GW(*res) &&
 		    DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = DN_FIB_RES_GW(*res);
-		memcpy(rt->u.dst.metrics, fi->fib_metrics,
-		       sizeof(rt->u.dst.metrics));
+		memcpy(rt->dst.metrics, fi->fib_metrics,
+		       sizeof(rt->dst.metrics));
 	}
 	rt->rt_type = res->type;
 
-	if (dev != NULL && rt->u.dst.neighbour == NULL) {
+	if (dev != NULL && rt->dst.neighbour == NULL) {
 		n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev);
 		if (IS_ERR(n))
 			return PTR_ERR(n);
-		rt->u.dst.neighbour = n;
+		rt->dst.neighbour = n;
 	}
 
-	if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 ||
-	    dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu)
-		rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
-	mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst));
-	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 ||
-	    dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss)
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = mss;
+	if (dst_metric(&rt->dst, RTAX_MTU) == 0 ||
+	    dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
+		rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
+	mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
+	if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 ||
+	    dst_metric(&rt->dst, RTAX_ADVMSS) > mss)
+		rt->dst.metrics[RTAX_ADVMSS-1] = mss;
 	return 0;
 }
 
@@ -1096,8 +1096,8 @@ make_route:
 	if (rt == NULL)
 		goto e_nobufs;
 
-	atomic_set(&rt->u.dst.__refcnt, 1);
-	rt->u.dst.flags   = DST_HOST;
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->dst.flags   = DST_HOST;
 
 	rt->fl.fld_src    = oldflp->fld_src;
 	rt->fl.fld_dst    = oldflp->fld_dst;
@@ -1113,17 +1113,17 @@ make_route:
 	rt->rt_dst_map    = fl.fld_dst;
 	rt->rt_src_map    = fl.fld_src;
 
-	rt->u.dst.dev = dev_out;
+	rt->dst.dev = dev_out;
 	dev_hold(dev_out);
-	rt->u.dst.neighbour = neigh;
+	rt->dst.neighbour = neigh;
 	neigh = NULL;
 
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.output  = dn_output;
-	rt->u.dst.input   = dn_rt_bug;
+	rt->dst.lastuse = jiffies;
+	rt->dst.output  = dn_output;
+	rt->dst.input   = dn_rt_bug;
 	rt->rt_flags      = flags;
 	if (flags & RTCF_LOCAL)
-		rt->u.dst.input = dn_nsp_rx;
+		rt->dst.input = dn_nsp_rx;
 
 	err = dn_rt_set_next_hop(rt, &res);
 	if (err)
@@ -1152,7 +1152,7 @@ e_nobufs:
 	err = -ENOBUFS;
 	goto done;
 e_neighbour:
-	dst_free(&rt->u.dst);
+	dst_free(&rt->dst);
 	goto e_nobufs;
 }
 
@@ -1168,15 +1168,15 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
 	if (!(flags & MSG_TRYHARD)) {
 		rcu_read_lock_bh();
 		for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
-			rt = rcu_dereference_bh(rt->u.dst.dn_next)) {
+			rt = rcu_dereference_bh(rt->dst.dn_next)) {
 			if ((flp->fld_dst == rt->fl.fld_dst) &&
 			    (flp->fld_src == rt->fl.fld_src) &&
 			    (flp->mark == rt->fl.mark) &&
 			    (rt->fl.iif == 0) &&
 			    (rt->fl.oif == flp->oif)) {
-				dst_use(&rt->u.dst, jiffies);
+				dst_use(&rt->dst, jiffies);
 				rcu_read_unlock_bh();
-				*pprt = &rt->u.dst;
+				*pprt = &rt->dst;
 				return 0;
 			}
 		}
@@ -1375,29 +1375,29 @@ make_route:
 	rt->fl.iif        = in_dev->ifindex;
 	rt->fl.mark       = fl.mark;
 
-	rt->u.dst.flags = DST_HOST;
-	rt->u.dst.neighbour = neigh;
-	rt->u.dst.dev = out_dev;
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.output = dn_rt_bug;
+	rt->dst.flags = DST_HOST;
+	rt->dst.neighbour = neigh;
+	rt->dst.dev = out_dev;
+	rt->dst.lastuse = jiffies;
+	rt->dst.output = dn_rt_bug;
 	switch(res.type) {
 		case RTN_UNICAST:
-			rt->u.dst.input = dn_forward;
+			rt->dst.input = dn_forward;
 			break;
 		case RTN_LOCAL:
-			rt->u.dst.output = dn_output;
-			rt->u.dst.input = dn_nsp_rx;
-			rt->u.dst.dev = in_dev;
+			rt->dst.output = dn_output;
+			rt->dst.input = dn_nsp_rx;
+			rt->dst.dev = in_dev;
 			flags |= RTCF_LOCAL;
 			break;
 		default:
 		case RTN_UNREACHABLE:
 		case RTN_BLACKHOLE:
-			rt->u.dst.input = dst_discard;
+			rt->dst.input = dst_discard;
 	}
 	rt->rt_flags = flags;
-	if (rt->u.dst.dev)
-		dev_hold(rt->u.dst.dev);
+	if (rt->dst.dev)
+		dev_hold(rt->dst.dev);
 
 	err = dn_rt_set_next_hop(rt, &res);
 	if (err)
@@ -1405,7 +1405,7 @@ make_route:
 
 	hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst);
 	dn_insert_route(rt, hash, &rt);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 done:
 	if (neigh)
@@ -1427,7 +1427,7 @@ e_nobufs:
 	goto done;
 
 e_neighbour:
-	dst_free(&rt->u.dst);
+	dst_free(&rt->dst);
 	goto done;
 }
 
@@ -1442,13 +1442,13 @@ static int dn_route_input(struct sk_buff *skb)
 
 	rcu_read_lock();
 	for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
-	    rt = rcu_dereference(rt->u.dst.dn_next)) {
+	    rt = rcu_dereference(rt->dst.dn_next)) {
 		if ((rt->fl.fld_src == cb->src) &&
 		    (rt->fl.fld_dst == cb->dst) &&
 		    (rt->fl.oif == 0) &&
 		    (rt->fl.mark == skb->mark) &&
 		    (rt->fl.iif == cb->iif)) {
-			dst_use(&rt->u.dst, jiffies);
+			dst_use(&rt->dst, jiffies);
 			rcu_read_unlock();
 			skb_dst_set(skb, (struct dst_entry *)rt);
 			return 0;
@@ -1487,8 +1487,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 		r->rtm_src_len = 16;
 		RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src);
 	}
-	if (rt->u.dst.dev)
-		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+	if (rt->dst.dev)
+		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex);
 	/*
 	 * Note to self - change this if input routes reverse direction when
 	 * they deal only with inputs and not with replies like they do
@@ -1497,11 +1497,11 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src);
 	if (rt->rt_daddr != rt->rt_gateway)
 		RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway);
-	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto rtattr_failure;
-	expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
-	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires,
-			       rt->u.dst.error) < 0)
+	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
+	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires,
+			       rt->dst.error) < 0)
 		goto rtattr_failure;
 	if (rt->fl.iif)
 		RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
@@ -1568,8 +1568,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 		local_bh_enable();
 		memset(cb, 0, sizeof(struct dn_skb_cb));
 		rt = (struct dn_route *)skb_dst(skb);
-		if (!err && -rt->u.dst.error)
-			err = rt->u.dst.error;
+		if (!err && -rt->dst.error)
+			err = rt->dst.error;
 	} else {
 		int oif = 0;
 		if (rta[RTA_OIF - 1])
@@ -1583,7 +1583,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 	skb->dev = NULL;
 	if (err)
 		goto out_free;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
@@ -1632,10 +1632,10 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		rcu_read_lock_bh();
 		for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
 			rt;
-			rt = rcu_dereference_bh(rt->u.dst.dn_next), idx++) {
+			rt = rcu_dereference_bh(rt->dst.dn_next), idx++) {
 			if (idx < s_idx)
 				continue;
-			skb_dst_set(skb, dst_clone(&rt->u.dst));
+			skb_dst_set(skb, dst_clone(&rt->dst));
 			if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
 					cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					1, NLM_F_MULTI) <= 0) {
@@ -1678,7 +1678,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
 {
 	struct dn_rt_cache_iter_state *s = seq->private;
 
-	rt = rt->u.dst.dn_next;
+	rt = rt->dst.dn_next;
 	while(!rt) {
 		rcu_read_unlock_bh();
 		if (--s->bucket < 0)
@@ -1719,12 +1719,12 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
 	char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN];
 
 	seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n",
-			rt->u.dst.dev ? rt->u.dst.dev->name : "*",
+			rt->dst.dev ? rt->dst.dev->name : "*",
 			dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1),
 			dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
-			atomic_read(&rt->u.dst.__refcnt),
-			rt->u.dst.__use,
-			(int) dst_metric(&rt->u.dst, RTAX_RTT));
+			atomic_read(&rt->dst.__refcnt),
+			rt->dst.__use,
+			(int) dst_metric(&rt->dst, RTAX_RTT));
 	return 0;
 }
 
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 61ec0329316c..215c83986a9d 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -158,7 +158,6 @@ EXPORT_SYMBOL(eth_rebuild_header);
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ethhdr *eth;
-	unsigned char *rawp;
 
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
@@ -199,15 +198,13 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	if (ntohs(eth->h_proto) >= 1536)
 		return eth->h_proto;
 
-	rawp = skb->data;
-
 	/*
 	 *      This is a magic hack to spot IPX packets. Older Novell breaks
 	 *      the protocol design and runs IPX over 802.3 without an 802.2 LLC
 	 *      layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
 	 *      won't work for fault tolerant netware but does for the rest.
 	 */
-	if (*(unsigned short *)rawp == 0xFFFF)
+	if (skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF)
 		return htons(ETH_P_802_3);
 
 	/*
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 551ce564b035..d99e7e020189 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1100,7 +1100,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
 	if (err)
 		return err;
 
-	sk_setup_caps(sk, &rt->u.dst);
+	sk_setup_caps(sk, &rt->dst);
 
 	new_saddr = rt->rt_src;
 
@@ -1166,7 +1166,7 @@ int inet_sk_rebuild_header(struct sock *sk)
 	err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
 }
 	if (!err)
-		sk_setup_caps(sk, &rt->u.dst);
+		sk_setup_caps(sk, &rt->dst);
 	else {
 		/* Routing failed... */
 		sk->sk_route_caps = 0;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 917d2d66162e..cf78f41830ca 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -427,7 +427,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
 
 	if (ip_route_output_key(net, &rt, &fl) < 0)
 		return 1;
-	if (rt->u.dst.dev != dev) {
+	if (rt->dst.dev != dev) {
 		NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
 		flag = 1;
 	}
@@ -532,7 +532,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
 	struct in_device *out_dev;
 	int imi, omi = -1;
 
-	if (rt->u.dst.dev == dev)
+	if (rt->dst.dev == dev)
 		return 0;
 
 	if (!IN_DEV_PROXY_ARP(in_dev))
@@ -545,7 +545,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
 
 	/* place to check for proxy_arp for routes */
 
-	out_dev = __in_dev_get_rcu(rt->u.dst.dev);
+	out_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (out_dev)
 		omi = IN_DEV_MEDIUM_ID(out_dev);
 
@@ -576,7 +576,7 @@ static inline int arp_fwd_pvlan(struct in_device *in_dev,
 				__be32 sip, __be32 tip)
 {
 	/* Private VLAN is only concerned about the same ethernet segment */
-	if (rt->u.dst.dev != dev)
+	if (rt->dst.dev != dev)
 		return 0;
 
 	/* Don't reply on self probes (often done by windowz boxes)*/
@@ -1042,7 +1042,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
 		struct rtable * rt;
 		if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
 			return err;
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 		if (!dev)
 			return -EINVAL;
@@ -1149,7 +1149,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
 		struct rtable * rt;
 		if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
 			return err;
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 		if (!dev)
 			return -EINVAL;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index fb2465811b48..fe3daa7f07a9 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -69,7 +69,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	sk->sk_state = TCP_ESTABLISHED;
 	inet->inet_id = jiffies;
 
-	sk_dst_set(sk, &rt->u.dst);
+	sk_dst_set(sk, &rt->dst);
 	return(0);
 }
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index bdb6c71e72a6..7569b21a3a2d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -271,7 +271,7 @@ int xrlim_allow(struct dst_entry *dst, int timeout)
 static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 		int type, int code)
 {
-	struct dst_entry *dst = &rt->u.dst;
+	struct dst_entry *dst = &rt->dst;
 	int rc = 1;
 
 	if (type > NR_ICMP_TYPES)
@@ -327,7 +327,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 	struct sock *sk;
 	struct sk_buff *skb;
 
-	sk = icmp_sk(dev_net((*rt)->u.dst.dev));
+	sk = icmp_sk(dev_net((*rt)->dst.dev));
 	if (ip_append_data(sk, icmp_glue_bits, icmp_param,
 			   icmp_param->data_len+icmp_param->head_len,
 			   icmp_param->head_len,
@@ -359,7 +359,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 {
 	struct ipcm_cookie ipc;
 	struct rtable *rt = skb_rtable(skb);
-	struct net *net = dev_net(rt->u.dst.dev);
+	struct net *net = dev_net(rt->dst.dev);
 	struct sock *sk;
 	struct inet_sock *inet;
 	__be32 daddr;
@@ -427,7 +427,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 
 	if (!rt)
 		goto out;
-	net = dev_net(rt->u.dst.dev);
+	net = dev_net(rt->dst.dev);
 
 	/*
 	 *	Find the original header. It is expected to be valid, of course.
@@ -596,9 +596,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 			/* Ugh! */
 			orefdst = skb_in->_skb_refdst; /* save old refdst */
 			err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
-					     RT_TOS(tos), rt2->u.dst.dev);
+					     RT_TOS(tos), rt2->dst.dev);
 
-			dst_release(&rt2->u.dst);
+			dst_release(&rt2->dst);
 			rt2 = skb_rtable(skb_in);
 			skb_in->_skb_refdst = orefdst; /* restore old refdst */
 		}
@@ -610,7 +610,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 				  XFRM_LOOKUP_ICMP);
 		switch (err) {
 		case 0:
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			rt = rt2;
 			break;
 		case -EPERM:
@@ -629,7 +629,7 @@ route_done:
 
 	/* RFC says return as much as we can without exceeding 576 bytes. */
 
-	room = dst_mtu(&rt->u.dst);
+	room = dst_mtu(&rt->dst);
 	if (room > 576)
 		room = 576;
 	room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
@@ -972,7 +972,7 @@ int icmp_rcv(struct sk_buff *skb)
 {
 	struct icmphdr *icmph;
 	struct rtable *rt = skb_rtable(skb);
-	struct net *net = dev_net(rt->u.dst.dev);
+	struct net *net = dev_net(rt->dst.dev);
 
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 		struct sec_path *sp = skb_sec_path(skb);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 3294f547c481..b5580d422994 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -312,7 +312,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 		return NULL;
 	}
 
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	skb->dev = dev;
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
@@ -330,7 +330,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	pip->saddr    = rt->rt_src;
 	pip->protocol = IPPROTO_IGMP;
 	pip->tot_len  = 0;	/* filled in later */
-	ip_select_ident(pip, &rt->u.dst, NULL);
+	ip_select_ident(pip, &rt->dst, NULL);
 	((u8*)&pip[1])[0] = IPOPT_RA;
 	((u8*)&pip[1])[1] = 4;
 	((u8*)&pip[1])[2] = 0;
@@ -660,7 +660,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 		return -1;
 	}
 
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
@@ -676,7 +676,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	iph->daddr    = dst;
 	iph->saddr    = rt->rt_src;
 	iph->protocol = IPPROTO_IGMP;
-	ip_select_ident(iph, &rt->u.dst, NULL);
+	ip_select_ident(iph, &rt->dst, NULL);
 	((u8*)&iph[1])[0] = IPOPT_RA;
 	((u8*)&iph[1])[1] = 4;
 	((u8*)&iph[1])[2] = 0;
@@ -1425,7 +1425,7 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
 	}
 
 	if (!dev && !ip_route_output_key(net, &rt, &fl)) {
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 	}
 	if (dev) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 70eb3507c406..57c9e4d7b805 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -383,7 +383,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 		goto no_route;
 	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto route_err;
-	return &rt->u.dst;
+	return &rt->dst;
 
 route_err:
 	ip_rt_put(rt);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 56cdf68a074c..99461f09320f 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -87,16 +87,16 @@ int ip_forward(struct sk_buff *skb)
 	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto sr_failed;
 
-	if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) &&
+	if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
 		     (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
-		IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS);
+		IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-			  htonl(dst_mtu(&rt->u.dst)));
+			  htonl(dst_mtu(&rt->dst)));
 		goto drop;
 	}
 
 	/* We are about to mangle packet. Copy it! */
-	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
+	if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len))
 		goto drop;
 	iph = ip_hdr(skb);
 
@@ -113,7 +113,7 @@ int ip_forward(struct sk_buff *skb)
 	skb->priority = rt_tos2priority(iph->tos);
 
 	return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev,
-		       rt->u.dst.dev, ip_forward_finish);
+		       rt->dst.dev, ip_forward_finish);
 
 sr_failed:
 	/*
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 32618e11076d..749e54889e82 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -745,7 +745,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			goto tx_error;
 		}
 	}
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
@@ -755,7 +755,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 
 	df = tiph->frag_off;
 	if (df)
-		mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
+		mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
 	else
 		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
@@ -803,7 +803,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			tunnel->err_count = 0;
 	}
 
-	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
+	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
 
 	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -830,7 +830,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -853,7 +853,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
 #endif
 		else
-			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
+			iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT);
 	}
 
 	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
@@ -915,7 +915,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
 				    .proto = IPPROTO_GRE };
 		struct rtable *rt;
 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tdev = rt->u.dst.dev;
+			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
 
@@ -1174,7 +1174,7 @@ static int ipgre_open(struct net_device *dev)
 		struct rtable *rt;
 		if (ip_route_output_key(dev_net(dev), &rt, &fl))
 			return -EADDRNOTAVAIL;
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 		if (__in_dev_get_rtnl(dev) == NULL)
 			return -EADDRNOTAVAIL;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 08a3b121f908..db47a5a00ed2 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -356,10 +356,10 @@ static int ip_rcv_finish(struct sk_buff *skb)
 
 	rt = skb_rtable(skb);
 	if (rt->rt_type == RTN_MULTICAST) {
-		IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST,
+		IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST,
 				skb->len);
 	} else if (rt->rt_type == RTN_BROADCAST)
-		IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST,
+		IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST,
 				skb->len);
 
 	return dst_input(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9a4a6c96cb0d..6cbeb2e108de 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -151,15 +151,15 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	iph->version  = 4;
 	iph->ihl      = 5;
 	iph->tos      = inet->tos;
-	if (ip_dont_fragment(sk, &rt->u.dst))
+	if (ip_dont_fragment(sk, &rt->dst))
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
-	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
+	iph->ttl      = ip_select_ttl(inet, &rt->dst);
 	iph->daddr    = rt->rt_dst;
 	iph->saddr    = rt->rt_src;
 	iph->protocol = sk->sk_protocol;
-	ip_select_ident(iph, &rt->u.dst, sk);
+	ip_select_ident(iph, &rt->dst, sk);
 
 	if (opt && opt->optlen) {
 		iph->ihl += opt->optlen>>2;
@@ -240,7 +240,7 @@ int ip_mc_output(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 	struct rtable *rt = skb_rtable(skb);
-	struct net_device *dev = rt->u.dst.dev;
+	struct net_device *dev = rt->dst.dev;
 
 	/*
 	 *	If the indicated interface is up and running, send the packet.
@@ -359,9 +359,9 @@ int ip_queue_xmit(struct sk_buff *skb)
 			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
 				goto no_route;
 		}
-		sk_setup_caps(sk, &rt->u.dst);
+		sk_setup_caps(sk, &rt->dst);
 	}
-	skb_dst_set_noref(skb, &rt->u.dst);
+	skb_dst_set_noref(skb, &rt->dst);
 
 packet_routed:
 	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
@@ -372,11 +372,11 @@ packet_routed:
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
 	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
-	if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df)
+	if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df)
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
-	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
+	iph->ttl      = ip_select_ttl(inet, &rt->dst);
 	iph->protocol = sk->sk_protocol;
 	iph->saddr    = rt->rt_src;
 	iph->daddr    = rt->rt_dst;
@@ -387,7 +387,7 @@ packet_routed:
 		ip_options_build(skb, opt, inet->inet_daddr, rt, 0);
 	}
 
-	ip_select_ident_more(iph, &rt->u.dst, sk,
+	ip_select_ident_more(iph, &rt->dst, sk,
 			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 
 	skb->priority = sk->sk_priority;
@@ -452,7 +452,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	struct rtable *rt = skb_rtable(skb);
 	int err = 0;
 
-	dev = rt->u.dst.dev;
+	dev = rt->dst.dev;
 
 	/*
 	 *	Point into the IP datagram header.
@@ -473,7 +473,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	 */
 
 	hlen = iph->ihl * 4;
-	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
+	mtu = dst_mtu(&rt->dst) - hlen;	/* Size of data space */
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge)
 		mtu -= nf_bridge_mtu_reduction(skb);
@@ -586,7 +586,7 @@ slow_path:
 	 * we need to make room for the encapsulating header
 	 */
 	pad = nf_bridge_pad(skb);
-	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
+	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, pad);
 	mtu -= pad;
 
 	/*
@@ -833,13 +833,13 @@ int ip_append_data(struct sock *sk,
 		 */
 		*rtp = NULL;
 		inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
-					    rt->u.dst.dev->mtu :
-					    dst_mtu(rt->u.dst.path);
-		inet->cork.dst = &rt->u.dst;
+					    rt->dst.dev->mtu :
+					    dst_mtu(rt->dst.path);
+		inet->cork.dst = &rt->dst;
 		inet->cork.length = 0;
 		sk->sk_sndmsg_page = NULL;
 		sk->sk_sndmsg_off = 0;
-		if ((exthdrlen = rt->u.dst.header_len) != 0) {
+		if ((exthdrlen = rt->dst.header_len) != 0) {
 			length += exthdrlen;
 			transhdrlen += exthdrlen;
 		}
@@ -852,7 +852,7 @@ int ip_append_data(struct sock *sk,
 		exthdrlen = 0;
 		mtu = inet->cork.fragsize;
 	}
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
@@ -869,14 +869,14 @@ int ip_append_data(struct sock *sk,
 	 */
 	if (transhdrlen &&
 	    length + fragheaderlen <= mtu &&
-	    rt->u.dst.dev->features & NETIF_F_V4_CSUM &&
+	    rt->dst.dev->features & NETIF_F_V4_CSUM &&
 	    !exthdrlen)
 		csummode = CHECKSUM_PARTIAL;
 
 	inet->cork.length += length;
 	if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+	    (rt->dst.dev->features & NETIF_F_UFO)) {
 		err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
 					 fragheaderlen, transhdrlen, mtu,
 					 flags);
@@ -924,7 +924,7 @@ alloc_new_skb:
 			fraglen = datalen + fragheaderlen;
 
 			if ((flags & MSG_MORE) &&
-			    !(rt->u.dst.dev->features&NETIF_F_SG))
+			    !(rt->dst.dev->features&NETIF_F_SG))
 				alloclen = mtu;
 			else
 				alloclen = datalen + fragheaderlen;
@@ -935,7 +935,7 @@ alloc_new_skb:
 			 * the last.
 			 */
 			if (datalen == length + fraggap)
-				alloclen += rt->u.dst.trailer_len;
+				alloclen += rt->dst.trailer_len;
 
 			if (transhdrlen) {
 				skb = sock_alloc_send_skb(sk,
@@ -1008,7 +1008,7 @@ alloc_new_skb:
 		if (copy > length)
 			copy = length;
 
-		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+		if (!(rt->dst.dev->features&NETIF_F_SG)) {
 			unsigned int off;
 
 			off = skb->len;
@@ -1103,10 +1103,10 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 	if (inet->cork.flags & IPCORK_OPT)
 		opt = inet->cork.opt;
 
-	if (!(rt->u.dst.dev->features&NETIF_F_SG))
+	if (!(rt->dst.dev->features&NETIF_F_SG))
 		return -EOPNOTSUPP;
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 	mtu = inet->cork.fragsize;
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
@@ -1122,7 +1122,7 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 
 	inet->cork.length += size;
 	if ((sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+	    (rt->dst.dev->features & NETIF_F_UFO)) {
 		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 	}
@@ -1274,8 +1274,8 @@ int ip_push_pending_frames(struct sock *sk)
 	 * If local_df is set too, we still allow to fragment this frame
 	 * locally. */
 	if (inet->pmtudisc >= IP_PMTUDISC_DO ||
-	    (skb->len <= dst_mtu(&rt->u.dst) &&
-	     ip_dont_fragment(sk, &rt->u.dst)))
+	    (skb->len <= dst_mtu(&rt->dst) &&
+	     ip_dont_fragment(sk, &rt->dst)))
 		df = htons(IP_DF);
 
 	if (inet->cork.flags & IPCORK_OPT)
@@ -1284,7 +1284,7 @@ int ip_push_pending_frames(struct sock *sk)
 	if (rt->rt_type == RTN_MULTICAST)
 		ttl = inet->mc_ttl;
 	else
-		ttl = ip_select_ttl(inet, &rt->u.dst);
+		ttl = ip_select_ttl(inet, &rt->dst);
 
 	iph = (struct iphdr *)skb->data;
 	iph->version = 4;
@@ -1295,7 +1295,7 @@ int ip_push_pending_frames(struct sock *sk)
 	}
 	iph->tos = inet->tos;
 	iph->frag_off = df;
-	ip_select_ident(iph, &rt->u.dst, sk);
+	ip_select_ident(iph, &rt->dst, sk);
 	iph->ttl = ttl;
 	iph->protocol = sk->sk_protocol;
 	iph->saddr = rt->rt_src;
@@ -1308,7 +1308,7 @@ int ip_push_pending_frames(struct sock *sk)
 	 * on dst refcount
 	 */
 	inet->cork.dst = NULL;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	if (iph->protocol == IPPROTO_ICMP)
 		icmp_out_count(net, ((struct icmphdr *)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 7fd636711037..ec036731a70b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -435,7 +435,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			goto tx_error_icmp;
 		}
 	}
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
@@ -446,7 +446,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	df |= old_iph->frag_off & htons(IP_DF);
 
 	if (df) {
-		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 
 		if (mtu < 68) {
 			stats->collisions++;
@@ -503,7 +503,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -552,7 +552,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
 				    .proto = IPPROTO_IPIP };
 		struct rtable *rt;
 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tdev = rt->u.dst.dev;
+			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
 		dev->flags |= IFF_POINTOPOINT;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 856123fe32f9..8418afc357ee 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1551,9 +1551,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 			goto out_free;
 	}
 
-	dev = rt->u.dst.dev;
+	dev = rt->dst.dev;
 
-	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
+	if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
 		/* Do not fragment multicasts. Alas, IPv4 does not
 		   allow to send ICMP, so that packets will disappear
 		   to blackhole.
@@ -1564,7 +1564,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 		goto out_free;
 	}
 
-	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
+	encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
 
 	if (skb_cow(skb, encap)) {
 		ip_rt_put(rt);
@@ -1575,7 +1575,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 	vif->bytes_out += skb->len;
 
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	ip_decrease_ttl(ip_hdr(skb));
 
 	/* FIXME: forward and output firewalls used to be called here.
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 07de855e2175..cfbc79af21c3 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -43,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 
 		/* Drop old route. */
 		skb_dst_drop(skb);
-		skb_dst_set(skb, &rt->u.dst);
+		skb_dst_set(skb, &rt->dst);
 	} else {
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
@@ -53,11 +53,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 
 		orefdst = skb->_skb_refdst;
 		if (ip_route_input(skb, iph->daddr, iph->saddr,
-				   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
-			dst_release(&rt->u.dst);
+				   RT_TOS(iph->tos), rt->dst.dev) != 0) {
+			dst_release(&rt->dst);
 			return -1;
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		refdst_drop(orefdst);
 	}
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 66cc3befcd44..009a7b2aa1ef 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -325,24 +325,24 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 	int err;
 	struct rtable *rt = *rtp;
 
-	if (length > rt->u.dst.dev->mtu) {
+	if (length > rt->dst.dev->mtu) {
 		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
-			       rt->u.dst.dev->mtu);
+			       rt->dst.dev->mtu);
 		return -EMSGSIZE;
 	}
 	if (flags&MSG_PROBE)
 		goto out;
 
 	skb = sock_alloc_send_skb(sk,
-				  length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
+				  length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
 				  flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto error;
-	skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
+	skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	*rtp = NULL;
 
 	skb_reset_network_header(skb);
@@ -375,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 		iph->check   = 0;
 		iph->tot_len = htons(length);
 		if (!iph->id)
-			ip_select_ident(iph, &rt->u.dst, NULL);
+			ip_select_ident(iph, &rt->dst, NULL);
 
 		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 	}
@@ -384,7 +384,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 			skb_transport_header(skb))->type);
 
 	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
-		      rt->u.dst.dev, dst_output);
+		      rt->dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
@@ -606,7 +606,7 @@ out:
 	return len;
 
 do_confirm:
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 	if (!(msg->msg_flags & MSG_PROBE) || len)
 		goto back_from_confirm;
 	err = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 883b5c7195ac..a291edbbc97f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -286,10 +286,10 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
 		rcu_read_lock_bh();
 		r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
 		while (r) {
-			if (dev_net(r->u.dst.dev) == seq_file_net(seq) &&
+			if (dev_net(r->dst.dev) == seq_file_net(seq) &&
 			    r->rt_genid == st->genid)
 				return r;
-			r = rcu_dereference_bh(r->u.dst.rt_next);
+			r = rcu_dereference_bh(r->dst.rt_next);
 		}
 		rcu_read_unlock_bh();
 	}
@@ -301,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
 {
 	struct rt_cache_iter_state *st = seq->private;
 
-	r = r->u.dst.rt_next;
+	r = r->dst.rt_next;
 	while (!r) {
 		rcu_read_unlock_bh();
 		do {
@@ -319,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq,
 {
 	struct rt_cache_iter_state *st = seq->private;
 	while ((r = __rt_cache_get_next(seq, r)) != NULL) {
-		if (dev_net(r->u.dst.dev) != seq_file_net(seq))
+		if (dev_net(r->dst.dev) != seq_file_net(seq))
 			continue;
 		if (r->rt_genid == st->genid)
 			break;
@@ -377,19 +377,19 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
 			      "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
-			r->u.dst.dev ? r->u.dst.dev->name : "*",
+			r->dst.dev ? r->dst.dev->name : "*",
 			(__force u32)r->rt_dst,
 			(__force u32)r->rt_gateway,
-			r->rt_flags, atomic_read(&r->u.dst.__refcnt),
-			r->u.dst.__use, 0, (__force u32)r->rt_src,
-			(dst_metric(&r->u.dst, RTAX_ADVMSS) ?
-			     (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0),
-			dst_metric(&r->u.dst, RTAX_WINDOW),
-			(int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) +
-			      dst_metric(&r->u.dst, RTAX_RTTVAR)),
+			r->rt_flags, atomic_read(&r->dst.__refcnt),
+			r->dst.__use, 0, (__force u32)r->rt_src,
+			(dst_metric(&r->dst, RTAX_ADVMSS) ?
+			     (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0),
+			dst_metric(&r->dst, RTAX_WINDOW),
+			(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
+			      dst_metric(&r->dst, RTAX_RTTVAR)),
 			r->fl.fl4_tos,
-			r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1,
-			r->u.dst.hh ? (r->u.dst.hh->hh_output ==
+			r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
+			r->dst.hh ? (r->dst.hh->hh_output ==
 				       dev_queue_xmit) : 0,
 			r->rt_spec_dst, &len);
 
@@ -608,13 +608,13 @@ static inline int ip_rt_proc_init(void)
 
 static inline void rt_free(struct rtable *rt)
 {
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static inline void rt_drop(struct rtable *rt)
 {
 	ip_rt_put(rt);
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static inline int rt_fast_clean(struct rtable *rth)
@@ -622,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth)
 	/* Kill broadcast/multicast entries very aggresively, if they
 	   collide in hash table with more useful entries */
 	return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
-		rth->fl.iif && rth->u.dst.rt_next;
+		rth->fl.iif && rth->dst.rt_next;
 }
 
 static inline int rt_valuable(struct rtable *rth)
 {
 	return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
-		rth->u.dst.expires;
+		rth->dst.expires;
 }
 
 static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -636,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t
 	unsigned long age;
 	int ret = 0;
 
-	if (atomic_read(&rth->u.dst.__refcnt))
+	if (atomic_read(&rth->dst.__refcnt))
 		goto out;
 
 	ret = 1;
-	if (rth->u.dst.expires &&
-	    time_after_eq(jiffies, rth->u.dst.expires))
+	if (rth->dst.expires &&
+	    time_after_eq(jiffies, rth->dst.expires))
 		goto out;
 
-	age = jiffies - rth->u.dst.lastuse;
+	age = jiffies - rth->dst.lastuse;
 	ret = 0;
 	if ((age <= tmo1 && !rt_fast_clean(rth)) ||
 	    (age <= tmo2 && rt_valuable(rth)))
@@ -660,7 +660,7 @@ out:	return ret;
  */
 static inline u32 rt_score(struct rtable *rt)
 {
-	u32 score = jiffies - rt->u.dst.lastuse;
+	u32 score = jiffies - rt->dst.lastuse;
 
 	score = ~score & ~(3<<30);
 
@@ -700,12 +700,12 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 
 static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
 {
-	return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev));
+	return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev));
 }
 
 static inline int rt_is_expired(struct rtable *rth)
 {
-	return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev));
+	return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
 }
 
 /*
@@ -734,7 +734,7 @@ static void rt_do_flush(int process_context)
 		rth = rt_hash_table[i].chain;
 
 		/* defer releasing the head of the list after spin_unlock */
-		for (tail = rth; tail; tail = tail->u.dst.rt_next)
+		for (tail = rth; tail; tail = tail->dst.rt_next)
 			if (!rt_is_expired(tail))
 				break;
 		if (rth != tail)
@@ -743,9 +743,9 @@ static void rt_do_flush(int process_context)
 		/* call rt_free on entries after the tail requiring flush */
 		prev = &rt_hash_table[i].chain;
 		for (p = *prev; p; p = next) {
-			next = p->u.dst.rt_next;
+			next = p->dst.rt_next;
 			if (!rt_is_expired(p)) {
-				prev = &p->u.dst.rt_next;
+				prev = &p->dst.rt_next;
 			} else {
 				*prev = next;
 				rt_free(p);
@@ -760,7 +760,7 @@ static void rt_do_flush(int process_context)
 		spin_unlock_bh(rt_hash_lock_addr(i));
 
 		for (; rth != tail; rth = next) {
-			next = rth->u.dst.rt_next;
+			next = rth->dst.rt_next;
 			rt_free(rth);
 		}
 	}
@@ -791,7 +791,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
 	while (aux != rth) {
 		if (compare_hash_inputs(&aux->fl, &rth->fl))
 			return 0;
-		aux = aux->u.dst.rt_next;
+		aux = aux->dst.rt_next;
 	}
 	return ONE;
 }
@@ -831,18 +831,18 @@ static void rt_check_expire(void)
 		length = 0;
 		spin_lock_bh(rt_hash_lock_addr(i));
 		while ((rth = *rthp) != NULL) {
-			prefetch(rth->u.dst.rt_next);
+			prefetch(rth->dst.rt_next);
 			if (rt_is_expired(rth)) {
-				*rthp = rth->u.dst.rt_next;
+				*rthp = rth->dst.rt_next;
 				rt_free(rth);
 				continue;
 			}
-			if (rth->u.dst.expires) {
+			if (rth->dst.expires) {
 				/* Entry is expired even if it is in use */
-				if (time_before_eq(jiffies, rth->u.dst.expires)) {
+				if (time_before_eq(jiffies, rth->dst.expires)) {
 nofree:
 					tmo >>= 1;
-					rthp = &rth->u.dst.rt_next;
+					rthp = &rth->dst.rt_next;
 					/*
 					 * We only count entries on
 					 * a chain with equal hash inputs once
@@ -858,7 +858,7 @@ nofree:
 				goto nofree;
 
 			/* Cleanup aged off entries. */
-			*rthp = rth->u.dst.rt_next;
+			*rthp = rth->dst.rt_next;
 			rt_free(rth);
 		}
 		spin_unlock_bh(rt_hash_lock_addr(i));
@@ -999,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops)
 				if (!rt_is_expired(rth) &&
 					!rt_may_expire(rth, tmo, expire)) {
 					tmo >>= 1;
-					rthp = &rth->u.dst.rt_next;
+					rthp = &rth->dst.rt_next;
 					continue;
 				}
-				*rthp = rth->u.dst.rt_next;
+				*rthp = rth->dst.rt_next;
 				rt_free(rth);
 				goal--;
 			}
@@ -1068,7 +1068,7 @@ static int slow_chain_length(const struct rtable *head)
 
 	while (rth) {
 		length += has_noalias(head, rth);
-		rth = rth->u.dst.rt_next;
+		rth = rth->dst.rt_next;
 	}
 	return length >> FRACT_BITS;
 }
@@ -1090,7 +1090,7 @@ restart:
 	candp = NULL;
 	now = jiffies;
 
-	if (!rt_caching(dev_net(rt->u.dst.dev))) {
+	if (!rt_caching(dev_net(rt->dst.dev))) {
 		/*
 		 * If we're not caching, just tell the caller we
 		 * were successful and don't touch the route.  The
@@ -1108,7 +1108,7 @@ restart:
 		 */
 
 		if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
-			int err = arp_bind_neighbour(&rt->u.dst);
+			int err = arp_bind_neighbour(&rt->dst);
 			if (err) {
 				if (net_ratelimit())
 					printk(KERN_WARNING
@@ -1127,19 +1127,19 @@ restart:
 	spin_lock_bh(rt_hash_lock_addr(hash));
 	while ((rth = *rthp) != NULL) {
 		if (rt_is_expired(rth)) {
-			*rthp = rth->u.dst.rt_next;
+			*rthp = rth->dst.rt_next;
 			rt_free(rth);
 			continue;
 		}
 		if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
 			/* Put it first */
-			*rthp = rth->u.dst.rt_next;
+			*rthp = rth->dst.rt_next;
 			/*
 			 * Since lookup is lockfree, the deletion
 			 * must be visible to another weakly ordered CPU before
 			 * the insertion at the start of the hash chain.
 			 */
-			rcu_assign_pointer(rth->u.dst.rt_next,
+			rcu_assign_pointer(rth->dst.rt_next,
 					   rt_hash_table[hash].chain);
 			/*
 			 * Since lookup is lockfree, the update writes
@@ -1147,18 +1147,18 @@ restart:
 			 */
 			rcu_assign_pointer(rt_hash_table[hash].chain, rth);
 
-			dst_use(&rth->u.dst, now);
+			dst_use(&rth->dst, now);
 			spin_unlock_bh(rt_hash_lock_addr(hash));
 
 			rt_drop(rt);
 			if (rp)
 				*rp = rth;
 			else
-				skb_dst_set(skb, &rth->u.dst);
+				skb_dst_set(skb, &rth->dst);
 			return 0;
 		}
 
-		if (!atomic_read(&rth->u.dst.__refcnt)) {
+		if (!atomic_read(&rth->dst.__refcnt)) {
 			u32 score = rt_score(rth);
 
 			if (score <= min_score) {
@@ -1170,7 +1170,7 @@ restart:
 
 		chain_length++;
 
-		rthp = &rth->u.dst.rt_next;
+		rthp = &rth->dst.rt_next;
 	}
 
 	if (cand) {
@@ -1181,17 +1181,17 @@ restart:
 		 * only 2 entries per bucket. We will see.
 		 */
 		if (chain_length > ip_rt_gc_elasticity) {
-			*candp = cand->u.dst.rt_next;
+			*candp = cand->dst.rt_next;
 			rt_free(cand);
 		}
 	} else {
 		if (chain_length > rt_chain_length_max &&
 		    slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
-			struct net *net = dev_net(rt->u.dst.dev);
+			struct net *net = dev_net(rt->dst.dev);
 			int num = ++net->ipv4.current_rt_cache_rebuild_count;
 			if (!rt_caching(net)) {
 				printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
-					rt->u.dst.dev->name, num);
+					rt->dst.dev->name, num);
 			}
 			rt_emergency_hash_rebuild(net);
 			spin_unlock_bh(rt_hash_lock_addr(hash));
@@ -1206,7 +1206,7 @@ restart:
 	   route or unicast forwarding path.
 	 */
 	if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
-		int err = arp_bind_neighbour(&rt->u.dst);
+		int err = arp_bind_neighbour(&rt->dst);
 		if (err) {
 			spin_unlock_bh(rt_hash_lock_addr(hash));
 
@@ -1237,14 +1237,14 @@ restart:
 		}
 	}
 
-	rt->u.dst.rt_next = rt_hash_table[hash].chain;
+	rt->dst.rt_next = rt_hash_table[hash].chain;
 
 #if RT_CACHE_DEBUG >= 2
-	if (rt->u.dst.rt_next) {
+	if (rt->dst.rt_next) {
 		struct rtable *trt;
 		printk(KERN_DEBUG "rt_cache @%02x: %pI4",
 		       hash, &rt->rt_dst);
-		for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
+		for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next)
 			printk(" . %pI4", &trt->rt_dst);
 		printk("\n");
 	}
@@ -1262,7 +1262,7 @@ skip_hashing:
 	if (rp)
 		*rp = rt;
 	else
-		skb_dst_set(skb, &rt->u.dst);
+		skb_dst_set(skb, &rt->dst);
 	return 0;
 }
 
@@ -1334,11 +1334,11 @@ static void rt_del(unsigned hash, struct rtable *rt)
 	ip_rt_put(rt);
 	while ((aux = *rthp) != NULL) {
 		if (aux == rt || rt_is_expired(aux)) {
-			*rthp = aux->u.dst.rt_next;
+			*rthp = aux->dst.rt_next;
 			rt_free(aux);
 			continue;
 		}
-		rthp = &aux->u.dst.rt_next;
+		rthp = &aux->dst.rt_next;
 	}
 	spin_unlock_bh(rt_hash_lock_addr(hash));
 }
@@ -1392,19 +1392,19 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 				    rth->fl.oif != ikeys[k] ||
 				    rth->fl.iif != 0 ||
 				    rt_is_expired(rth) ||
-				    !net_eq(dev_net(rth->u.dst.dev), net)) {
-					rthp = &rth->u.dst.rt_next;
+				    !net_eq(dev_net(rth->dst.dev), net)) {
+					rthp = &rth->dst.rt_next;
 					continue;
 				}
 
 				if (rth->rt_dst != daddr ||
 				    rth->rt_src != saddr ||
-				    rth->u.dst.error ||
+				    rth->dst.error ||
 				    rth->rt_gateway != old_gw ||
-				    rth->u.dst.dev != dev)
+				    rth->dst.dev != dev)
 					break;
 
-				dst_hold(&rth->u.dst);
+				dst_hold(&rth->dst);
 
 				rt = dst_alloc(&ipv4_dst_ops);
 				if (rt == NULL) {
@@ -1414,20 +1414,20 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 
 				/* Copy all the information. */
 				*rt = *rth;
-				rt->u.dst.__use		= 1;
-				atomic_set(&rt->u.dst.__refcnt, 1);
-				rt->u.dst.child		= NULL;
-				if (rt->u.dst.dev)
-					dev_hold(rt->u.dst.dev);
+				rt->dst.__use		= 1;
+				atomic_set(&rt->dst.__refcnt, 1);
+				rt->dst.child		= NULL;
+				if (rt->dst.dev)
+					dev_hold(rt->dst.dev);
 				if (rt->idev)
 					in_dev_hold(rt->idev);
-				rt->u.dst.obsolete	= -1;
-				rt->u.dst.lastuse	= jiffies;
-				rt->u.dst.path		= &rt->u.dst;
-				rt->u.dst.neighbour	= NULL;
-				rt->u.dst.hh		= NULL;
+				rt->dst.obsolete	= -1;
+				rt->dst.lastuse	= jiffies;
+				rt->dst.path		= &rt->dst;
+				rt->dst.neighbour	= NULL;
+				rt->dst.hh		= NULL;
 #ifdef CONFIG_XFRM
-				rt->u.dst.xfrm		= NULL;
+				rt->dst.xfrm		= NULL;
 #endif
 				rt->rt_genid		= rt_genid(net);
 				rt->rt_flags		|= RTCF_REDIRECTED;
@@ -1436,23 +1436,23 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 				rt->rt_gateway		= new_gw;
 
 				/* Redirect received -> path was valid */
-				dst_confirm(&rth->u.dst);
+				dst_confirm(&rth->dst);
 
 				if (rt->peer)
 					atomic_inc(&rt->peer->refcnt);
 
-				if (arp_bind_neighbour(&rt->u.dst) ||
-				    !(rt->u.dst.neighbour->nud_state &
+				if (arp_bind_neighbour(&rt->dst) ||
+				    !(rt->dst.neighbour->nud_state &
 					    NUD_VALID)) {
-					if (rt->u.dst.neighbour)
-						neigh_event_send(rt->u.dst.neighbour, NULL);
+					if (rt->dst.neighbour)
+						neigh_event_send(rt->dst.neighbour, NULL);
 					ip_rt_put(rth);
 					rt_drop(rt);
 					goto do_next;
 				}
 
-				netevent.old = &rth->u.dst;
-				netevent.new = &rt->u.dst;
+				netevent.old = &rth->dst;
+				netevent.new = &rt->dst;
 				call_netevent_notifiers(NETEVENT_REDIRECT,
 							&netevent);
 
@@ -1488,8 +1488,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 			ip_rt_put(rt);
 			ret = NULL;
 		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
-			   (rt->u.dst.expires &&
-			    time_after_eq(jiffies, rt->u.dst.expires))) {
+			   (rt->dst.expires &&
+			    time_after_eq(jiffies, rt->dst.expires))) {
 			unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
 						rt->fl.oif,
 						rt_genid(dev_net(dst->dev)));
@@ -1527,7 +1527,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	int log_martians;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(rt->u.dst.dev);
+	in_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
 		rcu_read_unlock();
 		return;
@@ -1538,30 +1538,30 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	/* No redirected packets during ip_rt_redirect_silence;
 	 * reset the algorithm.
 	 */
-	if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence))
-		rt->u.dst.rate_tokens = 0;
+	if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence))
+		rt->dst.rate_tokens = 0;
 
 	/* Too many ignored redirects; do not send anything
-	 * set u.dst.rate_last to the last seen redirected packet.
+	 * set dst.rate_last to the last seen redirected packet.
 	 */
-	if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) {
-		rt->u.dst.rate_last = jiffies;
+	if (rt->dst.rate_tokens >= ip_rt_redirect_number) {
+		rt->dst.rate_last = jiffies;
 		return;
 	}
 
 	/* Check for load limit; set rate_last to the latest sent
 	 * redirect.
 	 */
-	if (rt->u.dst.rate_tokens == 0 ||
+	if (rt->dst.rate_tokens == 0 ||
 	    time_after(jiffies,
-		       (rt->u.dst.rate_last +
-			(ip_rt_redirect_load << rt->u.dst.rate_tokens)))) {
+		       (rt->dst.rate_last +
+			(ip_rt_redirect_load << rt->dst.rate_tokens)))) {
 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
-		rt->u.dst.rate_last = jiffies;
-		++rt->u.dst.rate_tokens;
+		rt->dst.rate_last = jiffies;
+		++rt->dst.rate_tokens;
 #ifdef CONFIG_IP_ROUTE_VERBOSE
 		if (log_martians &&
-		    rt->u.dst.rate_tokens == ip_rt_redirect_number &&
+		    rt->dst.rate_tokens == ip_rt_redirect_number &&
 		    net_ratelimit())
 			printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
 				&rt->rt_src, rt->rt_iif,
@@ -1576,7 +1576,7 @@ static int ip_error(struct sk_buff *skb)
 	unsigned long now;
 	int code;
 
-	switch (rt->u.dst.error) {
+	switch (rt->dst.error) {
 		case EINVAL:
 		default:
 			goto out;
@@ -1585,7 +1585,7 @@ static int ip_error(struct sk_buff *skb)
 			break;
 		case ENETUNREACH:
 			code = ICMP_NET_UNREACH;
-			IP_INC_STATS_BH(dev_net(rt->u.dst.dev),
+			IP_INC_STATS_BH(dev_net(rt->dst.dev),
 					IPSTATS_MIB_INNOROUTES);
 			break;
 		case EACCES:
@@ -1594,12 +1594,12 @@ static int ip_error(struct sk_buff *skb)
 	}
 
 	now = jiffies;
-	rt->u.dst.rate_tokens += now - rt->u.dst.rate_last;
-	if (rt->u.dst.rate_tokens > ip_rt_error_burst)
-		rt->u.dst.rate_tokens = ip_rt_error_burst;
-	rt->u.dst.rate_last = now;
-	if (rt->u.dst.rate_tokens >= ip_rt_error_cost) {
-		rt->u.dst.rate_tokens -= ip_rt_error_cost;
+	rt->dst.rate_tokens += now - rt->dst.rate_last;
+	if (rt->dst.rate_tokens > ip_rt_error_burst)
+		rt->dst.rate_tokens = ip_rt_error_burst;
+	rt->dst.rate_last = now;
+	if (rt->dst.rate_tokens >= ip_rt_error_cost) {
+		rt->dst.rate_tokens -= ip_rt_error_cost;
 		icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
 	}
 
@@ -1644,7 +1644,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 
 			rcu_read_lock();
 			for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
-			     rth = rcu_dereference(rth->u.dst.rt_next)) {
+			     rth = rcu_dereference(rth->dst.rt_next)) {
 				unsigned short mtu = new_mtu;
 
 				if (rth->fl.fl4_dst != daddr ||
@@ -1653,8 +1653,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 				    rth->rt_src != iph->saddr ||
 				    rth->fl.oif != ikeys[k] ||
 				    rth->fl.iif != 0 ||
-				    dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
-				    !net_eq(dev_net(rth->u.dst.dev), net) ||
+				    dst_metric_locked(&rth->dst, RTAX_MTU) ||
+				    !net_eq(dev_net(rth->dst.dev), net) ||
 				    rt_is_expired(rth))
 					continue;
 
@@ -1662,22 +1662,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 
 					/* BSD 4.2 compatibility hack :-( */
 					if (mtu == 0 &&
-					    old_mtu >= dst_mtu(&rth->u.dst) &&
+					    old_mtu >= dst_mtu(&rth->dst) &&
 					    old_mtu >= 68 + (iph->ihl << 2))
 						old_mtu -= iph->ihl << 2;
 
 					mtu = guess_mtu(old_mtu);
 				}
-				if (mtu <= dst_mtu(&rth->u.dst)) {
-					if (mtu < dst_mtu(&rth->u.dst)) {
-						dst_confirm(&rth->u.dst);
+				if (mtu <= dst_mtu(&rth->dst)) {
+					if (mtu < dst_mtu(&rth->dst)) {
+						dst_confirm(&rth->dst);
 						if (mtu < ip_rt_min_pmtu) {
 							mtu = ip_rt_min_pmtu;
-							rth->u.dst.metrics[RTAX_LOCK-1] |=
+							rth->dst.metrics[RTAX_LOCK-1] |=
 								(1 << RTAX_MTU);
 						}
-						rth->u.dst.metrics[RTAX_MTU-1] = mtu;
-						dst_set_expires(&rth->u.dst,
+						rth->dst.metrics[RTAX_MTU-1] = mtu;
+						dst_set_expires(&rth->dst,
 							ip_rt_mtu_expires);
 					}
 					est_mtu = mtu;
@@ -1750,7 +1750,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
 
 	rt = skb_rtable(skb);
 	if (rt)
-		dst_set_expires(&rt->u.dst, 0);
+		dst_set_expires(&rt->dst, 0);
 }
 
 static int ip_rt_bug(struct sk_buff *skb)
@@ -1778,11 +1778,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 
 	if (rt->fl.iif == 0)
 		src = rt->rt_src;
-	else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) {
+	else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) {
 		src = FIB_RES_PREFSRC(res);
 		fib_res_put(&res);
 	} else
-		src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
+		src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
 					RT_SCOPE_UNIVERSE);
 	memcpy(addr, &src, 4);
 }
@@ -1790,10 +1790,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 #ifdef CONFIG_NET_CLS_ROUTE
 static void set_class_tag(struct rtable *rt, u32 tag)
 {
-	if (!(rt->u.dst.tclassid & 0xFFFF))
-		rt->u.dst.tclassid |= tag & 0xFFFF;
-	if (!(rt->u.dst.tclassid & 0xFFFF0000))
-		rt->u.dst.tclassid |= tag & 0xFFFF0000;
+	if (!(rt->dst.tclassid & 0xFFFF))
+		rt->dst.tclassid |= tag & 0xFFFF;
+	if (!(rt->dst.tclassid & 0xFFFF0000))
+		rt->dst.tclassid |= tag & 0xFFFF0000;
 }
 #endif
 
@@ -1805,30 +1805,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 		if (FIB_RES_GW(*res) &&
 		    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = FIB_RES_GW(*res);
-		memcpy(rt->u.dst.metrics, fi->fib_metrics,
-		       sizeof(rt->u.dst.metrics));
+		memcpy(rt->dst.metrics, fi->fib_metrics,
+		       sizeof(rt->dst.metrics));
 		if (fi->fib_mtu == 0) {
-			rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
-			if (dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
+			rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
+			if (dst_metric_locked(&rt->dst, RTAX_MTU) &&
 			    rt->rt_gateway != rt->rt_dst &&
-			    rt->u.dst.dev->mtu > 576)
-				rt->u.dst.metrics[RTAX_MTU-1] = 576;
+			    rt->dst.dev->mtu > 576)
+				rt->dst.metrics[RTAX_MTU-1] = 576;
 		}
 #ifdef CONFIG_NET_CLS_ROUTE
-		rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
+		rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
 #endif
 	} else
-		rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
-
-	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
-		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
-	if (dst_mtu(&rt->u.dst) > IP_MAX_MTU)
-		rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
-	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0)
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
+		rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu;
+
+	if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
+		rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
+	if (dst_mtu(&rt->dst) > IP_MAX_MTU)
+		rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
+	if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0)
+		rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40,
 				       ip_rt_min_advmss);
-	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40)
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
+	if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40)
+		rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
 
 #ifdef CONFIG_NET_CLS_ROUTE
 #ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1873,13 +1873,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (!rth)
 		goto e_nobufs;
 
-	rth->u.dst.output = ip_rt_bug;
-	rth->u.dst.obsolete = -1;
+	rth->dst.output = ip_rt_bug;
+	rth->dst.obsolete = -1;
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
 	rth->fl.fl4_tos	= tos;
@@ -1887,13 +1887,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
-	rth->u.dst.tclassid = itag;
+	rth->dst.tclassid = itag;
 #endif
 	rth->rt_iif	=
 	rth->fl.iif	= dev->ifindex;
-	rth->u.dst.dev	= init_net.loopback_dev;
-	dev_hold(rth->u.dst.dev);
-	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->dst.dev	= init_net.loopback_dev;
+	dev_hold(rth->dst.dev);
+	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->fl.oif	= 0;
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
@@ -1901,13 +1901,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->rt_flags	= RTCF_MULTICAST;
 	rth->rt_type	= RTN_MULTICAST;
 	if (our) {
-		rth->u.dst.input= ip_local_deliver;
+		rth->dst.input= ip_local_deliver;
 		rth->rt_flags |= RTCF_LOCAL;
 	}
 
 #ifdef CONFIG_IP_MROUTE
 	if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
-		rth->u.dst.input = ip_mr_input;
+		rth->dst.input = ip_mr_input;
 #endif
 	RT_CACHE_STAT_INC(in_slow_mc);
 
@@ -2016,12 +2016,12 @@ static int __mkroute_input(struct sk_buff *skb,
 		goto cleanup;
 	}
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 	if (IN_DEV_CONF_GET(out_dev, NOXFRM))
-		rth->u.dst.flags |= DST_NOXFRM;
+		rth->dst.flags |= DST_NOXFRM;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
 	rth->fl.fl4_tos	= tos;
@@ -2031,16 +2031,16 @@ static int __mkroute_input(struct sk_buff *skb,
 	rth->rt_gateway	= daddr;
 	rth->rt_iif 	=
 		rth->fl.iif	= in_dev->dev->ifindex;
-	rth->u.dst.dev	= (out_dev)->dev;
-	dev_hold(rth->u.dst.dev);
-	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->dst.dev	= (out_dev)->dev;
+	dev_hold(rth->dst.dev);
+	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->fl.oif 	= 0;
 	rth->rt_spec_dst= spec_dst;
 
-	rth->u.dst.obsolete = -1;
-	rth->u.dst.input = ip_forward;
-	rth->u.dst.output = ip_output;
-	rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
+	rth->dst.obsolete = -1;
+	rth->dst.input = ip_forward;
+	rth->dst.output = ip_output;
+	rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
 
 	rt_set_nexthop(rth, res, itag);
 
@@ -2074,7 +2074,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
 
 	/* put it into the cache */
 	hash = rt_hash(daddr, saddr, fl->iif,
-		       rt_genid(dev_net(rth->u.dst.dev)));
+		       rt_genid(dev_net(rth->dst.dev)));
 	return rt_intern_hash(hash, rth, NULL, skb, fl->iif);
 }
 
@@ -2197,14 +2197,14 @@ local_input:
 	if (!rth)
 		goto e_nobufs;
 
-	rth->u.dst.output= ip_rt_bug;
-	rth->u.dst.obsolete = -1;
+	rth->dst.output= ip_rt_bug;
+	rth->dst.obsolete = -1;
 	rth->rt_genid = rt_genid(net);
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
 	rth->fl.fl4_tos	= tos;
@@ -2212,20 +2212,20 @@ local_input:
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
-	rth->u.dst.tclassid = itag;
+	rth->dst.tclassid = itag;
 #endif
 	rth->rt_iif	=
 	rth->fl.iif	= dev->ifindex;
-	rth->u.dst.dev	= net->loopback_dev;
-	dev_hold(rth->u.dst.dev);
-	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->dst.dev	= net->loopback_dev;
+	dev_hold(rth->dst.dev);
+	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
-	rth->u.dst.input= ip_local_deliver;
+	rth->dst.input= ip_local_deliver;
 	rth->rt_flags 	= flags|RTCF_LOCAL;
 	if (res.type == RTN_UNREACHABLE) {
-		rth->u.dst.input= ip_error;
-		rth->u.dst.error= -err;
+		rth->dst.input= ip_error;
+		rth->dst.error= -err;
 		rth->rt_flags 	&= ~RTCF_LOCAL;
 	}
 	rth->rt_type	= res.type;
@@ -2291,21 +2291,21 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	hash = rt_hash(daddr, saddr, iif, rt_genid(net));
 
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
-	     rth = rcu_dereference(rth->u.dst.rt_next)) {
+	     rth = rcu_dereference(rth->dst.rt_next)) {
 		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
 		     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
 		     (rth->fl.iif ^ iif) |
 		     rth->fl.oif |
 		     (rth->fl.fl4_tos ^ tos)) == 0 &&
 		    rth->fl.mark == skb->mark &&
-		    net_eq(dev_net(rth->u.dst.dev), net) &&
+		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
 			if (noref) {
-				dst_use_noref(&rth->u.dst, jiffies);
-				skb_dst_set_noref(skb, &rth->u.dst);
+				dst_use_noref(&rth->dst, jiffies);
+				skb_dst_set_noref(skb, &rth->dst);
 			} else {
-				dst_use(&rth->u.dst, jiffies);
-				skb_dst_set(skb, &rth->u.dst);
+				dst_use(&rth->dst, jiffies);
+				skb_dst_set(skb, &rth->dst);
 			}
 			RT_CACHE_STAT_INC(in_hit);
 			rcu_read_unlock();
@@ -2412,12 +2412,12 @@ static int __mkroute_output(struct rtable **result,
 		goto cleanup;
 	}
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOXFRM))
-		rth->u.dst.flags |= DST_NOXFRM;
+		rth->dst.flags |= DST_NOXFRM;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 
 	rth->fl.fl4_dst	= oldflp->fl4_dst;
 	rth->fl.fl4_tos	= tos;
@@ -2429,35 +2429,35 @@ static int __mkroute_output(struct rtable **result,
 	rth->rt_iif	= oldflp->oif ? : dev_out->ifindex;
 	/* get references to the devices that are to be hold by the routing
 	   cache entry */
-	rth->u.dst.dev	= dev_out;
+	rth->dst.dev	= dev_out;
 	dev_hold(dev_out);
 	rth->idev	= in_dev_get(dev_out);
 	rth->rt_gateway = fl->fl4_dst;
 	rth->rt_spec_dst= fl->fl4_src;
 
-	rth->u.dst.output=ip_output;
-	rth->u.dst.obsolete = -1;
+	rth->dst.output=ip_output;
+	rth->dst.obsolete = -1;
 	rth->rt_genid = rt_genid(dev_net(dev_out));
 
 	RT_CACHE_STAT_INC(out_slow_tot);
 
 	if (flags & RTCF_LOCAL) {
-		rth->u.dst.input = ip_local_deliver;
+		rth->dst.input = ip_local_deliver;
 		rth->rt_spec_dst = fl->fl4_dst;
 	}
 	if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
 		rth->rt_spec_dst = fl->fl4_src;
 		if (flags & RTCF_LOCAL &&
 		    !(dev_out->flags & IFF_LOOPBACK)) {
-			rth->u.dst.output = ip_mc_output;
+			rth->dst.output = ip_mc_output;
 			RT_CACHE_STAT_INC(out_slow_mc);
 		}
 #ifdef CONFIG_IP_MROUTE
 		if (res->type == RTN_MULTICAST) {
 			if (IN_DEV_MFORWARD(in_dev) &&
 			    !ipv4_is_local_multicast(oldflp->fl4_dst)) {
-				rth->u.dst.input = ip_mr_input;
-				rth->u.dst.output = ip_mc_output;
+				rth->dst.input = ip_mr_input;
+				rth->dst.output = ip_mc_output;
 			}
 		}
 #endif
@@ -2712,7 +2712,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 
 	rcu_read_lock_bh();
 	for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
-		rth = rcu_dereference_bh(rth->u.dst.rt_next)) {
+		rth = rcu_dereference_bh(rth->dst.rt_next)) {
 		if (rth->fl.fl4_dst == flp->fl4_dst &&
 		    rth->fl.fl4_src == flp->fl4_src &&
 		    rth->fl.iif == 0 &&
@@ -2720,9 +2720,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 		    rth->fl.mark == flp->mark &&
 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
 			    (IPTOS_RT_MASK | RTO_ONLINK)) &&
-		    net_eq(dev_net(rth->u.dst.dev), net) &&
+		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
-			dst_use(&rth->u.dst, jiffies);
+			dst_use(&rth->dst, jiffies);
 			RT_CACHE_STAT_INC(out_hit);
 			rcu_read_unlock_bh();
 			*rp = rth;
@@ -2759,15 +2759,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 		dst_alloc(&ipv4_dst_blackhole_ops);
 
 	if (rt) {
-		struct dst_entry *new = &rt->u.dst;
+		struct dst_entry *new = &rt->dst;
 
 		atomic_set(&new->__refcnt, 1);
 		new->__use = 1;
 		new->input = dst_discard;
 		new->output = dst_discard;
-		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
+		memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
 
-		new->dev = ort->u.dst.dev;
+		new->dev = ort->dst.dev;
 		if (new->dev)
 			dev_hold(new->dev);
 
@@ -2791,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 		dst_free(new);
 	}
 
-	dst_release(&(*rp)->u.dst);
+	dst_release(&(*rp)->dst);
 	*rp = rt;
 	return (rt ? 0 : -ENOMEM);
 }
@@ -2861,11 +2861,11 @@ static int rt_fill_info(struct net *net,
 		r->rtm_src_len = 32;
 		NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
 	}
-	if (rt->u.dst.dev)
-		NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);
+	if (rt->dst.dev)
+		NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
 #ifdef CONFIG_NET_CLS_ROUTE
-	if (rt->u.dst.tclassid)
-		NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
+	if (rt->dst.tclassid)
+		NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
 #endif
 	if (rt->fl.iif)
 		NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
@@ -2875,11 +2875,11 @@ static int rt_fill_info(struct net *net,
 	if (rt->rt_dst != rt->rt_gateway)
 		NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
 
-	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto nla_put_failure;
 
-	error = rt->u.dst.error;
-	expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
+	error = rt->dst.error;
+	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
 	if (rt->peer) {
 		id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
 		if (rt->peer->tcp_ts_stamp) {
@@ -2911,7 +2911,7 @@ static int rt_fill_info(struct net *net,
 			NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
 	}
 
-	if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage,
+	if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
 			       expires, error) < 0)
 		goto nla_put_failure;
 
@@ -2976,8 +2976,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 		local_bh_enable();
 
 		rt = skb_rtable(skb);
-		if (err == 0 && rt->u.dst.error)
-			err = -rt->u.dst.error;
+		if (err == 0 && rt->dst.error)
+			err = -rt->dst.error;
 	} else {
 		struct flowi fl = {
 			.nl_u = {
@@ -2995,7 +2995,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	if (err)
 		goto errout_free;
 
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
@@ -3031,12 +3031,12 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 			continue;
 		rcu_read_lock_bh();
 		for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
-		     rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) {
-			if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
+		     rt = rcu_dereference_bh(rt->dst.rt_next), idx++) {
+			if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx)
 				continue;
 			if (rt_is_expired(rt))
 				continue;
-			skb_dst_set_noref(skb, &rt->u.dst);
+			skb_dst_set_noref(skb, &rt->dst);
 			if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
 					 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					 1, NLM_F_MULTI) <= 0) {
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5c48124332de..02bef6aa8b30 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -354,15 +354,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	}
 
 	/* Try to redo what tcp_v4_send_synack did. */
-	req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW);
+	req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
 
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
 				  ireq->wscale_ok, &rcv_wscale,
-				  dst_metric(&rt->u.dst, RTAX_INITRWND));
+				  dst_metric(&rt->dst, RTAX_INITRWND));
 
 	ireq->rcv_wscale  = rcv_wscale;
 
-	ret = get_cookie_sock(sk, skb, req, &rt->u.dst);
+	ret = get_cookie_sock(sk, skb, req, &rt->dst);
 out:	return ret;
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7f976af27bf0..7f9515c0379f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -237,7 +237,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	/* OK, now commit destination to socket.  */
 	sk->sk_gso_type = SKB_GSO_TCPV4;
-	sk_setup_caps(sk, &rt->u.dst);
+	sk_setup_caps(sk, &rt->dst);
 
 	if (!tp->write_seq)
 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index eec4ff456e33..32e0bef60d0a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -914,7 +914,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		    !sock_flag(sk, SOCK_BROADCAST))
 			goto out;
 		if (connected)
-			sk_dst_set(sk, dst_clone(&rt->u.dst));
+			sk_dst_set(sk, dst_clone(&rt->dst));
 	}
 
 	if (msg->msg_flags&MSG_CONFIRM)
@@ -978,7 +978,7 @@ out:
 	return err;
 
 do_confirm:
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 	if (!(msg->msg_flags&MSG_PROBE) || len)
 		goto back_from_confirm;
 	err = 0;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1705476670ef..349327092c9e 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -37,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
 		fl.fl4_src = saddr->a4;
 
 	err = __ip_route_output_key(net, &rt, &fl);
-	dst = &rt->u.dst;
+	dst = &rt->dst;
 	if (err)
 		dst = ERR_PTR(err);
 	return dst;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1a698df5706..b97bb1f30808 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -557,7 +557,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 		pr_warning("Freeing alive inet6 address %p\n", ifp);
 		return;
 	}
-	dst_release(&ifp->rt->u.dst);
+	dst_release(&ifp->rt->dst);
 
 	call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu);
 }
@@ -823,7 +823,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 				rt->rt6i_flags |= RTF_EXPIRES;
 			}
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	}
 
 out:
@@ -1863,7 +1863,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 					      dev, expires, flags);
 		}
 		if (rt)
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 	}
 
 	/* Try to figure out our local address for this prefix */
@@ -4093,11 +4093,11 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 		if (ifp->idev->cnf.forwarding)
 			addrconf_leave_anycast(ifp);
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
-		dst_hold(&ifp->rt->u.dst);
+		dst_hold(&ifp->rt->dst);
 
 		if (ifp->state == INET6_IFADDR_STATE_DEAD &&
 		    ip6_del_rt(ifp->rt))
-			dst_free(&ifp->rt->u.dst);
+			dst_free(&ifp->rt->dst);
 		break;
 	}
 }
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index f058fbd808c8..0e5e943446f0 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -84,7 +84,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 		} else if (ishost) {
 			err = -EADDRNOTAVAIL;
 			goto error;
@@ -244,7 +244,7 @@ static void aca_put(struct ifacaddr6 *ac)
 {
 	if (atomic_dec_and_test(&ac->aca_refcnt)) {
 		in6_dev_put(ac->aca_idev);
-		dst_release(&ac->aca_rt->u.dst);
+		dst_release(&ac->aca_rt->dst);
 		kfree(ac);
 	}
 }
@@ -350,7 +350,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
-	dst_hold(&aca->aca_rt->u.dst);
+	dst_hold(&aca->aca_rt->dst);
 	ip6_del_rt(aca->aca_rt);
 
 	aca_put(aca);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 8e44f8f9c188..b1108ede18e1 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -43,8 +43,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
 	if (arg.result)
 		return arg.result;
 
-	dst_hold(&net->ipv6.ip6_null_entry->u.dst);
-	return &net->ipv6.ip6_null_entry->u.dst;
+	dst_hold(&net->ipv6.ip6_null_entry->dst);
+	return &net->ipv6.ip6_null_entry->dst;
 }
 
 static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
@@ -86,7 +86,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 			struct in6_addr saddr;
 
 			if (ipv6_dev_get_saddr(net,
-					       ip6_dst_idev(&rt->u.dst)->dev,
+					       ip6_dst_idev(&rt->dst)->dev,
 					       &flp->fl6_dst,
 					       rt6_flags2srcprefs(flags),
 					       &saddr))
@@ -99,12 +99,12 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 		goto out;
 	}
 again:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	rt = NULL;
 	goto out;
 
 discard_pkt:
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 out:
 	arg->result = rt;
 	return rt == NULL ? -EAGAIN : 0;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 92a122b7795d..b6a585909d35 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -165,7 +165,7 @@ static __inline__ void node_free(struct fib6_node * fn)
 static __inline__ void rt6_release(struct rt6_info *rt)
 {
 	if (atomic_dec_and_test(&rt->rt6i_ref))
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 }
 
 static void fib6_link_table(struct net *net, struct fib6_table *tb)
@@ -278,7 +278,7 @@ static int fib6_dump_node(struct fib6_walker_t *w)
 	int res;
 	struct rt6_info *rt;
 
-	for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
 		res = rt6_dump_route(rt, w->args);
 		if (res < 0) {
 			/* Frame is full, suspend walking */
@@ -619,7 +619,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 
 	ins = &fn->leaf;
 
-	for (iter = fn->leaf; iter; iter=iter->u.dst.rt6_next) {
+	for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) {
 		/*
 		 *	Search for duplicates
 		 */
@@ -647,7 +647,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 		if (iter->rt6i_metric > rt->rt6i_metric)
 			break;
 
-		ins = &iter->u.dst.rt6_next;
+		ins = &iter->dst.rt6_next;
 	}
 
 	/* Reset round-robin state, if necessary */
@@ -658,7 +658,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 	 *	insert node
 	 */
 
-	rt->u.dst.rt6_next = iter;
+	rt->dst.rt6_next = iter;
 	*ins = rt;
 	rt->rt6i_node = fn;
 	atomic_inc(&rt->rt6i_ref);
@@ -799,7 +799,7 @@ out:
 			atomic_inc(&pn->leaf->rt6i_ref);
 		}
 #endif
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 	}
 	return err;
 
@@ -810,7 +810,7 @@ out:
 st_failure:
 	if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
 		fib6_repair_tree(info->nl_net, fn);
-	dst_free(&rt->u.dst);
+	dst_free(&rt->dst);
 	return err;
 #endif
 }
@@ -1108,7 +1108,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 	RT6_TRACE("fib6_del_route\n");
 
 	/* Unlink it */
-	*rtp = rt->u.dst.rt6_next;
+	*rtp = rt->dst.rt6_next;
 	rt->rt6i_node = NULL;
 	net->ipv6.rt6_stats->fib_rt_entries--;
 	net->ipv6.rt6_stats->fib_discarded_routes++;
@@ -1122,14 +1122,14 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 	FOR_WALKERS(w) {
 		if (w->state == FWS_C && w->leaf == rt) {
 			RT6_TRACE("walker %p adjusted by delroute\n", w);
-			w->leaf = rt->u.dst.rt6_next;
+			w->leaf = rt->dst.rt6_next;
 			if (w->leaf == NULL)
 				w->state = FWS_U;
 		}
 	}
 	read_unlock(&fib6_walker_lock);
 
-	rt->u.dst.rt6_next = NULL;
+	rt->dst.rt6_next = NULL;
 
 	/* If it was last route, expunge its radix tree node */
 	if (fn->leaf == NULL) {
@@ -1168,7 +1168,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 	struct rt6_info **rtp;
 
 #if RT6_DEBUG >= 2
-	if (rt->u.dst.obsolete>0) {
+	if (rt->dst.obsolete>0) {
 		WARN_ON(fn != NULL);
 		return -ENOENT;
 	}
@@ -1195,7 +1195,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 	 *	Walk the leaf entries looking for ourself
 	 */
 
-	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.dst.rt6_next) {
+	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) {
 		if (*rtp == rt) {
 			fib6_del_route(fn, rtp, info);
 			return 0;
@@ -1334,7 +1334,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
 		.nl_net = c->net,
 	};
 
-	for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
 		res = c->func(rt, c->arg);
 		if (res < 0) {
 			w->leaf = rt;
@@ -1448,8 +1448,8 @@ static int fib6_age(struct rt6_info *rt, void *arg)
 		}
 		gc_args.more++;
 	} else if (rt->rt6i_flags & RTF_CACHE) {
-		if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
-		    time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) {
+		if (atomic_read(&rt->dst.__refcnt) == 0 &&
+		    time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) {
 			RT6_TRACE("aging clone %p\n", rt);
 			return -1;
 		} else if ((rt->rt6i_flags & RTF_GATEWAY) &&
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 89425af0684c..d40b330c0ee6 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -698,7 +698,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		ipv6_hdr(skb)->payload_len = htons(first_len -
 						   sizeof(struct ipv6hdr));
 
-		dst_hold(&rt->u.dst);
+		dst_hold(&rt->dst);
 
 		for (;;) {
 			/* Prepare header of the next frame,
@@ -726,7 +726,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 			err = output(skb);
 			if(!err)
-				IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 					      IPSTATS_MIB_FRAGCREATES);
 
 			if (err || !frag)
@@ -740,9 +740,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		kfree(tmp_hdr);
 
 		if (err == 0) {
-			IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 				      IPSTATS_MIB_FRAGOKS);
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			return 0;
 		}
 
@@ -752,9 +752,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			frag = skb;
 		}
 
-		IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 			      IPSTATS_MIB_FRAGFAILS);
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		return err;
 	}
 
@@ -785,7 +785,7 @@ slow_path:
 		 *	Allocate buffer.
 		 */
 
-		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
+		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 				      IPSTATS_MIB_FRAGFAILS);
@@ -798,7 +798,7 @@ slow_path:
 		 */
 
 		ip6_copy_metadata(frag, skb);
-		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
+		skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 		skb_reset_network_header(frag);
 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
@@ -1156,24 +1156,24 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 
 			/* need source address above miyazawa*/
 		}
-		dst_hold(&rt->u.dst);
-		inet->cork.dst = &rt->u.dst;
+		dst_hold(&rt->dst);
+		inet->cork.dst = &rt->dst;
 		inet->cork.fl = *fl;
 		np->cork.hop_limit = hlimit;
 		np->cork.tclass = tclass;
 		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
-		      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
+		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
 		if (np->frag_size < mtu) {
 			if (np->frag_size)
 				mtu = np->frag_size;
 		}
 		inet->cork.fragsize = mtu;
-		if (dst_allfrag(rt->u.dst.path))
+		if (dst_allfrag(rt->dst.path))
 			inet->cork.flags |= IPCORK_ALLFRAG;
 		inet->cork.length = 0;
 		sk->sk_sndmsg_page = NULL;
 		sk->sk_sndmsg_off = 0;
-		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
+		exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
 			    rt->rt6i_nfheader_len;
 		length += exthdrlen;
 		transhdrlen += exthdrlen;
@@ -1186,7 +1186,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		mtu = inet->cork.fragsize;
 	}
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
 			(opt ? opt->opt_nflen : 0);
@@ -1224,7 +1224,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		}
 
 		if (proto == IPPROTO_UDP &&
-		    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+		    (rt->dst.dev->features & NETIF_F_UFO)) {
 
 			err = ip6_ufo_append_data(sk, getfrag, from, length,
 						  hh_len, fragheaderlen,
@@ -1270,7 +1270,7 @@ alloc_new_skb:
 
 			fraglen = datalen + fragheaderlen;
 			if ((flags & MSG_MORE) &&
-			    !(rt->u.dst.dev->features&NETIF_F_SG))
+			    !(rt->dst.dev->features&NETIF_F_SG))
 				alloclen = mtu;
 			else
 				alloclen = datalen + fragheaderlen;
@@ -1281,7 +1281,7 @@ alloc_new_skb:
 			 * because we have no idea if we're the last one.
 			 */
 			if (datalen == length + fraggap)
-				alloclen += rt->u.dst.trailer_len;
+				alloclen += rt->dst.trailer_len;
 
 			/*
 			 * We just reserve space for fragment header.
@@ -1358,7 +1358,7 @@ alloc_new_skb:
 		if (copy > length)
 			copy = length;
 
-		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+		if (!(rt->dst.dev->features&NETIF_F_SG)) {
 			unsigned int off;
 
 			off = skb->len;
@@ -1503,7 +1503,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 
-	skb_dst_set(skb, dst_clone(&rt->u.dst));
+	skb_dst_set(skb, dst_clone(&rt->dst));
 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	if (proto == IPPROTO_ICMPV6) {
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 8f39893d8081..0fd027f3f47e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -552,7 +552,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (ip_route_output_key(dev_net(skb->dev), &rt, &fl))
 		goto out;
 
-	skb2->dev = rt->u.dst.dev;
+	skb2->dev = rt->dst.dev;
 
 	/* route "incoming" packet */
 	if (rt->rt_flags & RTCF_LOCAL) {
@@ -562,7 +562,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		fl.fl4_src = eiph->saddr;
 		fl.fl4_tos = eiph->tos;
 		if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
-		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
+		    rt->dst.dev->type != ARPHRD_TUNNEL) {
 			ip_rt_put(rt);
 			goto out;
 		}
@@ -626,7 +626,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		icmpv6_send(skb2, rel_type, rel_code, rel_info);
 
 		if (rt)
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 
 		kfree_skb(skb2);
 	}
@@ -1135,7 +1135,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 			if (dev->mtu < IPV6_MIN_MTU)
 				dev->mtu = IPV6_MIN_MTU;
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	}
 }
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 3e36d1538b6e..d1444b95ad7e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -158,7 +158,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 		}
 	} else
 		dev = dev_get_by_index_rcu(net, ifindex);
@@ -248,7 +248,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
 		if (rt) {
 			dev = rt->rt6i_dev;
 			dev_hold(dev);
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 		}
 	} else
 		dev = dev_get_by_index_rcu(net, ifindex);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0abdc242ddb7..1fc46fc60efd 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1229,7 +1229,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			ND_PRINTK0(KERN_ERR
 				   "ICMPv6 RA: %s() got default router without neighbour.\n",
 				   __func__);
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			in6_dev_put(in6_dev);
 			return;
 		}
@@ -1244,7 +1244,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	if (ra_msg->icmph.icmp6_hop_limit) {
 		in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
 		if (rt)
-			rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
+			rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
 	}
 
 skip_defrtr:
@@ -1363,7 +1363,7 @@ skip_linkparms:
 			in6_dev->cnf.mtu6 = mtu;
 
 			if (rt)
-				rt->u.dst.metrics[RTAX_MTU-1] = mtu;
+				rt->dst.metrics[RTAX_MTU-1] = mtu;
 
 			rt6_mtu_change(skb->dev, mtu);
 		}
@@ -1384,7 +1384,7 @@ skip_linkparms:
 	}
 out:
 	if (rt)
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	else if (neigh)
 		neigh_release(neigh);
 	in6_dev_put(in6_dev);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 968b96490729..e677937a07fc 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -611,23 +611,23 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 	int err;
 	struct rt6_info *rt = (struct rt6_info *)*dstp;
 
-	if (length > rt->u.dst.dev->mtu) {
-		ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
+	if (length > rt->dst.dev->mtu) {
+		ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu);
 		return -EMSGSIZE;
 	}
 	if (flags&MSG_PROBE)
 		goto out;
 
 	skb = sock_alloc_send_skb(sk,
-				  length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
+				  length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
 				  flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto error;
-	skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
+	skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	*dstp = NULL;
 
 	skb_put(skb, length);
@@ -643,7 +643,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 
 	IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
-		      rt->u.dst.dev, dst_output);
+		      rt->dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 252d76199c41..f7702850d45c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -126,16 +126,14 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 };
 
 static struct rt6_info ip6_null_entry_template = {
-	.u = {
-		.dst = {
-			.__refcnt	= ATOMIC_INIT(1),
-			.__use		= 1,
-			.obsolete	= -1,
-			.error		= -ENETUNREACH,
-			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
-			.input		= ip6_pkt_discard,
-			.output		= ip6_pkt_discard_out,
-		}
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= -1,
+		.error		= -ENETUNREACH,
+		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+		.input		= ip6_pkt_discard,
+		.output		= ip6_pkt_discard_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
@@ -149,16 +147,14 @@ static int ip6_pkt_prohibit(struct sk_buff *skb);
 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 
 static struct rt6_info ip6_prohibit_entry_template = {
-	.u = {
-		.dst = {
-			.__refcnt	= ATOMIC_INIT(1),
-			.__use		= 1,
-			.obsolete	= -1,
-			.error		= -EACCES,
-			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
-			.input		= ip6_pkt_prohibit,
-			.output		= ip6_pkt_prohibit_out,
-		}
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= -1,
+		.error		= -EACCES,
+		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+		.input		= ip6_pkt_prohibit,
+		.output		= ip6_pkt_prohibit_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
@@ -167,16 +163,14 @@ static struct rt6_info ip6_prohibit_entry_template = {
 };
 
 static struct rt6_info ip6_blk_hole_entry_template = {
-	.u = {
-		.dst = {
-			.__refcnt	= ATOMIC_INIT(1),
-			.__use		= 1,
-			.obsolete	= -1,
-			.error		= -EINVAL,
-			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
-			.input		= dst_discard,
-			.output		= dst_discard,
-		}
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= -1,
+		.error		= -EINVAL,
+		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+		.input		= dst_discard,
+		.output		= dst_discard,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
@@ -249,7 +243,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 	if (!oif && ipv6_addr_any(saddr))
 		goto out;
 
-	for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
+	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 		struct net_device *dev = sprt->rt6i_dev;
 
 		if (oif) {
@@ -407,10 +401,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 
 	match = NULL;
 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
-	     rt = rt->u.dst.rt6_next)
+	     rt = rt->dst.rt6_next)
 		match = find_match(rt, oif, strict, &mpri, match);
 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
-	     rt = rt->u.dst.rt6_next)
+	     rt = rt->dst.rt6_next)
 		match = find_match(rt, oif, strict, &mpri, match);
 
 	return match;
@@ -432,7 +426,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 
 	if (!match &&
 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
-		struct rt6_info *next = rt0->u.dst.rt6_next;
+		struct rt6_info *next = rt0->dst.rt6_next;
 
 		/* no entries matched; do round-robin */
 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
@@ -517,7 +511,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 			rt->rt6i_expires = jiffies + HZ * lifetime;
 			rt->rt6i_flags |= RTF_EXPIRES;
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	}
 	return 0;
 }
@@ -555,7 +549,7 @@ restart:
 	rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
 	BACKTRACK(net, &fl->fl6_src);
 out:
-	dst_use(&rt->u.dst, jiffies);
+	dst_use(&rt->dst, jiffies);
 	read_unlock_bh(&table->tb6_lock);
 	return rt;
 
@@ -643,7 +637,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 		rt->rt6i_dst.plen = 128;
 		rt->rt6i_flags |= RTF_CACHE;
-		rt->u.dst.flags |= DST_HOST;
+		rt->dst.flags |= DST_HOST;
 
 #ifdef CONFIG_IPV6_SUBTREES
 		if (rt->rt6i_src.plen && saddr) {
@@ -677,7 +671,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 			if (net_ratelimit())
 				printk(KERN_WARNING
 				       "Neighbour table overflow.\n");
-			dst_free(&rt->u.dst);
+			dst_free(&rt->dst);
 			return NULL;
 		}
 		rt->rt6i_nexthop = neigh;
@@ -694,7 +688,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 		rt->rt6i_dst.plen = 128;
 		rt->rt6i_flags |= RTF_CACHE;
-		rt->u.dst.flags |= DST_HOST;
+		rt->dst.flags |= DST_HOST;
 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
 	}
 	return rt;
@@ -726,7 +720,7 @@ restart:
 	    rt->rt6i_flags & RTF_CACHE)
 		goto out;
 
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 	read_unlock_bh(&table->tb6_lock);
 
 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
@@ -739,10 +733,10 @@ restart:
 #endif
 	}
 
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	rt = nrt ? : net->ipv6.ip6_null_entry;
 
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 	if (nrt) {
 		err = ip6_ins_rt(nrt);
 		if (!err)
@@ -756,7 +750,7 @@ restart:
 	 * Race condition! In the gap, when table->tb6_lock was
 	 * released someone could insert this route.  Relookup.
 	 */
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	goto relookup;
 
 out:
@@ -764,11 +758,11 @@ out:
 		reachable = 0;
 		goto restart_2;
 	}
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 	read_unlock_bh(&table->tb6_lock);
 out2:
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.__use++;
+	rt->dst.lastuse = jiffies;
+	rt->dst.__use++;
 
 	return rt;
 }
@@ -835,15 +829,15 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
 	struct dst_entry *new = NULL;
 
 	if (rt) {
-		new = &rt->u.dst;
+		new = &rt->dst;
 
 		atomic_set(&new->__refcnt, 1);
 		new->__use = 1;
 		new->input = dst_discard;
 		new->output = dst_discard;
 
-		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
-		new->dev = ort->u.dst.dev;
+		memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+		new->dev = ort->dst.dev;
 		if (new->dev)
 			dev_hold(new->dev);
 		rt->rt6i_idev = ort->rt6i_idev;
@@ -912,7 +906,7 @@ static void ip6_link_failure(struct sk_buff *skb)
 	rt = (struct rt6_info *) skb_dst(skb);
 	if (rt) {
 		if (rt->rt6i_flags&RTF_CACHE) {
-			dst_set_expires(&rt->u.dst, 0);
+			dst_set_expires(&rt->dst, 0);
 			rt->rt6i_flags |= RTF_EXPIRES;
 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
 			rt->rt6i_node->fn_sernum = -1;
@@ -986,14 +980,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	rt->rt6i_dev	  = dev;
 	rt->rt6i_idev     = idev;
 	rt->rt6i_nexthop  = neigh;
-	atomic_set(&rt->u.dst.__refcnt, 1);
-	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
-	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
-	rt->u.dst.output  = ip6_output;
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
+	rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
+	rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+	rt->dst.output  = ip6_output;
 
 #if 0	/* there's no chance to use these for ndisc */
-	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
+	rt->dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
 				? DST_HOST
 				: 0;
 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
@@ -1001,14 +995,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 #endif
 
 	spin_lock_bh(&icmp6_dst_lock);
-	rt->u.dst.next = icmp6_dst_gc_list;
-	icmp6_dst_gc_list = &rt->u.dst;
+	rt->dst.next = icmp6_dst_gc_list;
+	icmp6_dst_gc_list = &rt->dst;
 	spin_unlock_bh(&icmp6_dst_lock);
 
 	fib6_force_start_gc(net);
 
 out:
-	return &rt->u.dst;
+	return &rt->dst;
 }
 
 int icmp6_dst_gc(void)
@@ -1159,7 +1153,7 @@ int ip6_route_add(struct fib6_config *cfg)
 		goto out;
 	}
 
-	rt->u.dst.obsolete = -1;
+	rt->dst.obsolete = -1;
 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
 				0;
@@ -1171,16 +1165,16 @@ int ip6_route_add(struct fib6_config *cfg)
 	addr_type = ipv6_addr_type(&cfg->fc_dst);
 
 	if (addr_type & IPV6_ADDR_MULTICAST)
-		rt->u.dst.input = ip6_mc_input;
+		rt->dst.input = ip6_mc_input;
 	else
-		rt->u.dst.input = ip6_forward;
+		rt->dst.input = ip6_forward;
 
-	rt->u.dst.output = ip6_output;
+	rt->dst.output = ip6_output;
 
 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
 	rt->rt6i_dst.plen = cfg->fc_dst_len;
 	if (rt->rt6i_dst.plen == 128)
-	       rt->u.dst.flags = DST_HOST;
+	       rt->dst.flags = DST_HOST;
 
 #ifdef CONFIG_IPV6_SUBTREES
 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
@@ -1208,9 +1202,9 @@ int ip6_route_add(struct fib6_config *cfg)
 				goto out;
 			}
 		}
-		rt->u.dst.output = ip6_pkt_discard_out;
-		rt->u.dst.input = ip6_pkt_discard;
-		rt->u.dst.error = -ENETUNREACH;
+		rt->dst.output = ip6_pkt_discard_out;
+		rt->dst.input = ip6_pkt_discard;
+		rt->dst.error = -ENETUNREACH;
 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
 		goto install_route;
 	}
@@ -1244,7 +1238,7 @@ int ip6_route_add(struct fib6_config *cfg)
 				goto out;
 			if (dev) {
 				if (dev != grt->rt6i_dev) {
-					dst_release(&grt->u.dst);
+					dst_release(&grt->dst);
 					goto out;
 				}
 			} else {
@@ -1255,7 +1249,7 @@ int ip6_route_add(struct fib6_config *cfg)
 			}
 			if (!(grt->rt6i_flags&RTF_GATEWAY))
 				err = 0;
-			dst_release(&grt->u.dst);
+			dst_release(&grt->dst);
 
 			if (err)
 				goto out;
@@ -1294,18 +1288,18 @@ install_route:
 					goto out;
 				}
 
-				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
+				rt->dst.metrics[type - 1] = nla_get_u32(nla);
 			}
 		}
 	}
 
-	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
-		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
-	if (!dst_mtu(&rt->u.dst))
-		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
-	if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
-	rt->u.dst.dev = dev;
+	if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
+		rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
+	if (!dst_mtu(&rt->dst))
+		rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
+	if (!dst_metric(&rt->dst, RTAX_ADVMSS))
+		rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+	rt->dst.dev = dev;
 	rt->rt6i_idev = idev;
 	rt->rt6i_table = table;
 
@@ -1319,7 +1313,7 @@ out:
 	if (idev)
 		in6_dev_put(idev);
 	if (rt)
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 	return err;
 }
 
@@ -1336,7 +1330,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 	write_lock_bh(&table->tb6_lock);
 
 	err = fib6_del(rt, info);
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 
 	write_unlock_bh(&table->tb6_lock);
 
@@ -1369,7 +1363,7 @@ static int ip6_route_del(struct fib6_config *cfg)
 			 &cfg->fc_src, cfg->fc_src_len);
 
 	if (fn) {
-		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
+		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
 			if (cfg->fc_ifindex &&
 			    (rt->rt6i_dev == NULL ||
 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
@@ -1379,7 +1373,7 @@ static int ip6_route_del(struct fib6_config *cfg)
 				continue;
 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
 				continue;
-			dst_hold(&rt->u.dst);
+			dst_hold(&rt->dst);
 			read_unlock_bh(&table->tb6_lock);
 
 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
@@ -1421,7 +1415,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	read_lock_bh(&table->tb6_lock);
 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 restart:
-	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
 		/*
 		 * Current route is on-link; redirect is always invalid.
 		 *
@@ -1445,7 +1439,7 @@ restart:
 		rt = net->ipv6.ip6_null_entry;
 	BACKTRACK(net, &fl->fl6_src);
 out:
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 
 	read_unlock_bh(&table->tb6_lock);
 
@@ -1513,10 +1507,10 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 	 * Look, redirects are sent only in response to data packets,
 	 * so that this nexthop apparently is reachable. --ANK
 	 */
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 
 	/* Duplicate redirect: silently ignore. */
-	if (neigh == rt->u.dst.neighbour)
+	if (neigh == rt->dst.neighbour)
 		goto out;
 
 	nrt = ip6_rt_copy(rt);
@@ -1529,20 +1523,20 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 
 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
 	nrt->rt6i_dst.plen = 128;
-	nrt->u.dst.flags |= DST_HOST;
+	nrt->dst.flags |= DST_HOST;
 
 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
 	nrt->rt6i_nexthop = neigh_clone(neigh);
 	/* Reset pmtu, it may be better */
-	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
-	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
-							dst_mtu(&nrt->u.dst));
+	nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
+	nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
+							dst_mtu(&nrt->dst));
 
 	if (ip6_ins_rt(nrt))
 		goto out;
 
-	netevent.old = &rt->u.dst;
-	netevent.new = &nrt->u.dst;
+	netevent.old = &rt->dst;
+	netevent.new = &nrt->dst;
 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
 	if (rt->rt6i_flags&RTF_CACHE) {
@@ -1551,7 +1545,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 	}
 
 out:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 }
 
 /*
@@ -1570,7 +1564,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 	if (rt == NULL)
 		return;
 
-	if (pmtu >= dst_mtu(&rt->u.dst))
+	if (pmtu >= dst_mtu(&rt->dst))
 		goto out;
 
 	if (pmtu < IPV6_MIN_MTU) {
@@ -1588,7 +1582,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 	   They are sent only in response to data packets,
 	   so that this nexthop apparently is reachable. --ANK
 	 */
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 
 	/* Host route. If it is static, it would be better
 	   not to override it, but add new one, so that
@@ -1596,10 +1590,10 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 	   would return automatically.
 	 */
 	if (rt->rt6i_flags & RTF_CACHE) {
-		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+		rt->dst.metrics[RTAX_MTU-1] = pmtu;
 		if (allfrag)
-			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
-		dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
+			rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
 		goto out;
 	}
@@ -1615,9 +1609,9 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 		nrt = rt6_alloc_clone(rt, daddr);
 
 	if (nrt) {
-		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+		nrt->dst.metrics[RTAX_MTU-1] = pmtu;
 		if (allfrag)
-			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+			nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
 
 		/* According to RFC 1981, detecting PMTU increase shouldn't be
 		 * happened within 5 mins, the recommended timer is 10 mins.
@@ -1625,13 +1619,13 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
 		 * and detecting PMTU increase will be automatically happened.
 		 */
-		dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
+		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
 
 		ip6_ins_rt(nrt);
 	}
 out:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 }
 
 /*
@@ -1644,18 +1638,18 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
 
 	if (rt) {
-		rt->u.dst.input = ort->u.dst.input;
-		rt->u.dst.output = ort->u.dst.output;
-
-		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
-		rt->u.dst.error = ort->u.dst.error;
-		rt->u.dst.dev = ort->u.dst.dev;
-		if (rt->u.dst.dev)
-			dev_hold(rt->u.dst.dev);
+		rt->dst.input = ort->dst.input;
+		rt->dst.output = ort->dst.output;
+
+		memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+		rt->dst.error = ort->dst.error;
+		rt->dst.dev = ort->dst.dev;
+		if (rt->dst.dev)
+			dev_hold(rt->dst.dev);
 		rt->rt6i_idev = ort->rt6i_idev;
 		if (rt->rt6i_idev)
 			in6_dev_hold(rt->rt6i_idev);
-		rt->u.dst.lastuse = jiffies;
+		rt->dst.lastuse = jiffies;
 		rt->rt6i_expires = 0;
 
 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
@@ -1689,14 +1683,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 	if (!fn)
 		goto out;
 
-	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
 		if (rt->rt6i_dev->ifindex != ifindex)
 			continue;
 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
 			continue;
 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
 			continue;
-		dst_hold(&rt->u.dst);
+		dst_hold(&rt->dst);
 		break;
 	}
 out:
@@ -1744,14 +1738,14 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
 		return NULL;
 
 	write_lock_bh(&table->tb6_lock);
-	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
+	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
 		if (dev == rt->rt6i_dev &&
 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
 			break;
 	}
 	if (rt)
-		dst_hold(&rt->u.dst);
+		dst_hold(&rt->dst);
 	write_unlock_bh(&table->tb6_lock);
 	return rt;
 }
@@ -1790,9 +1784,9 @@ void rt6_purge_dflt_routers(struct net *net)
 
 restart:
 	read_lock_bh(&table->tb6_lock);
-	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
-			dst_hold(&rt->u.dst);
+			dst_hold(&rt->dst);
 			read_unlock_bh(&table->tb6_lock);
 			ip6_del_rt(rt);
 			goto restart;
@@ -1930,15 +1924,15 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	dev_hold(net->loopback_dev);
 	in6_dev_hold(idev);
 
-	rt->u.dst.flags = DST_HOST;
-	rt->u.dst.input = ip6_input;
-	rt->u.dst.output = ip6_output;
+	rt->dst.flags = DST_HOST;
+	rt->dst.input = ip6_input;
+	rt->dst.output = ip6_output;
 	rt->rt6i_dev = net->loopback_dev;
 	rt->rt6i_idev = idev;
-	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
-	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
-	rt->u.dst.obsolete = -1;
+	rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
+	rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+	rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
+	rt->dst.obsolete = -1;
 
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
 	if (anycast)
@@ -1947,7 +1941,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 		rt->rt6i_flags |= RTF_LOCAL;
 	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 	if (IS_ERR(neigh)) {
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 
 		/* We are casting this because that is the return
 		 * value type.  But an errno encoded pointer is the
@@ -1962,7 +1956,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->rt6i_dst.plen = 128;
 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
 
-	atomic_set(&rt->u.dst.__refcnt, 1);
+	atomic_set(&rt->dst.__refcnt, 1);
 
 	return rt;
 }
@@ -2033,12 +2027,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	   PMTU discouvery.
 	 */
 	if (rt->rt6i_dev == arg->dev &&
-	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
-	    (dst_mtu(&rt->u.dst) >= arg->mtu ||
-	     (dst_mtu(&rt->u.dst) < arg->mtu &&
-	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
-		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
+	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
+	    (dst_mtu(&rt->dst) >= arg->mtu ||
+	     (dst_mtu(&rt->dst) < arg->mtu &&
+	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
+		rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
+		rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
 	}
 	return 0;
 }
@@ -2252,20 +2246,20 @@ static int rt6_fill_node(struct net *net,
 #endif
 			NLA_PUT_U32(skb, RTA_IIF, iif);
 	} else if (dst) {
-		struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
+		struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
 		struct in6_addr saddr_buf;
 		if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
 				       dst, 0, &saddr_buf) == 0)
 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
 
-	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto nla_put_failure;
 
-	if (rt->u.dst.neighbour)
-		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
+	if (rt->dst.neighbour)
+		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
 
-	if (rt->u.dst.dev)
+	if (rt->dst.dev)
 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
 
 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
@@ -2277,8 +2271,8 @@ static int rt6_fill_node(struct net *net,
 	else
 		expires = INT_MAX;
 
-	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
-			       expires, rt->u.dst.error) < 0)
+	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
+			       expires, rt->dst.error) < 0)
 		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
@@ -2364,7 +2358,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
 
 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
@@ -2416,12 +2410,12 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 	struct net *net = dev_net(dev);
 
 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
-		net->ipv6.ip6_null_entry->u.dst.dev = dev;
+		net->ipv6.ip6_null_entry->dst.dev = dev;
 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-		net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
+		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
-		net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
+		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
 #endif
 	}
@@ -2464,8 +2458,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
 		seq_puts(m, "00000000000000000000000000000000");
 	}
 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
-		   rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
-		   rt->u.dst.__use, rt->rt6i_flags,
+		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
+		   rt->dst.__use, rt->rt6i_flags,
 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
 	return 0;
 }
@@ -2646,9 +2640,9 @@ static int __net_init ip6_route_net_init(struct net *net)
 					   GFP_KERNEL);
 	if (!net->ipv6.ip6_null_entry)
 		goto out_ip6_dst_ops;
-	net->ipv6.ip6_null_entry->u.dst.path =
+	net->ipv6.ip6_null_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_null_entry;
-	net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2656,18 +2650,18 @@ static int __net_init ip6_route_net_init(struct net *net)
 					       GFP_KERNEL);
 	if (!net->ipv6.ip6_prohibit_entry)
 		goto out_ip6_null_entry;
-	net->ipv6.ip6_prohibit_entry->u.dst.path =
+	net->ipv6.ip6_prohibit_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
-	net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 
 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
 					       GFP_KERNEL);
 	if (!net->ipv6.ip6_blk_hole_entry)
 		goto out_ip6_prohibit_entry;
-	net->ipv6.ip6_blk_hole_entry->u.dst.path =
+	net->ipv6.ip6_blk_hole_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
-	net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 #endif
 
 	net->ipv6.sysctl.flush_delay = 0;
@@ -2742,12 +2736,12 @@ int __init ip6_route_init(void)
 	/* Registering of the loopback is done before this portion of code,
 	 * the loopback reference in rt6_info will not be taken, do it
 	 * manually for init_net */
-	init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-	init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
-	init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
   #endif
 	ret = fib6_init();
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 702c532ec21e..4699cd3c3118 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -712,7 +712,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 		stats->tx_carrier_errors++;
 		goto tx_error_icmp;
 	}
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
@@ -721,7 +721,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	}
 
 	if (df) {
-		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 
 		if (mtu < 68) {
 			stats->collisions++;
@@ -780,7 +780,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags = 0;
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -829,7 +829,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
 				    .proto = IPPROTO_IPV6 };
 		struct rtable *rt;
 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tdev = rt->u.dst.dev;
+			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
 		dev->flags |= IFF_POINTOPOINT;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 0852512d392c..226a0ae3bcfd 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -348,7 +348,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	sk->sk_state = TCP_ESTABLISHED;
 	inet->inet_id = jiffies;
 
-	sk_dst_set(sk, &rt->u.dst);
+	sk_dst_set(sk, &rt->dst);
 
 	write_lock_bh(&l2tp_ip_lock);
 	hlist_del_init(&sk->sk_bind_node);
@@ -496,9 +496,9 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
 				goto no_route;
 		}
-		sk_setup_caps(sk, &rt->u.dst);
+		sk_setup_caps(sk, &rt->dst);
 	}
-	skb_dst_set(skb, dst_clone(&rt->u.dst));
+	skb_dst_set(skb, dst_clone(&rt->dst));
 
 	/* Queue the packet to IP for output */
 	rc = ip_queue_xmit(skb);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 93c15a107b2c..02b078e11cf3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -90,10 +90,10 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
 					     &dest->addr.ip);
 				return NULL;
 			}
-			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
+			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst));
 			IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
 				  &dest->addr.ip,
-				  atomic_read(&rt->u.dst.__refcnt), rtos);
+				  atomic_read(&rt->dst.__refcnt), rtos);
 		}
 		spin_unlock(&dest->dst_lock);
 	} else {
@@ -148,10 +148,10 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
 					     &dest->addr.in6);
 				return NULL;
 			}
-			__ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
+			__ip_vs_dst_set(dest, 0, dst_clone(&rt->dst));
 			IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
 				  &dest->addr.in6,
-				  atomic_read(&rt->u.dst.__refcnt));
+				  atomic_read(&rt->dst.__refcnt));
 		}
 		spin_unlock(&dest->dst_lock);
 	} else {
@@ -198,7 +198,7 @@ do {							\
 	(skb)->ipvs_property = 1;			\
 	skb_forward_csum(skb);				\
 	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
-		(rt)->u.dst.dev, dst_output);		\
+		(rt)->dst.dev, dst_output);		\
 } while (0)
 
 
@@ -245,7 +245,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -265,7 +265,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -309,9 +309,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
@@ -323,13 +323,13 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 */
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(skb == NULL)) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		return NF_STOLEN;
 	}
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -376,7 +376,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -388,12 +388,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, sizeof(struct iphdr)))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -452,9 +452,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 				 "ip_vs_nat_xmit_v6(): frag needed for");
@@ -465,12 +465,12 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -498,7 +498,7 @@ tx_error:
 	kfree_skb(skb);
 	return NF_STOLEN;
 tx_error_put:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	goto tx_error;
 }
 #endif
@@ -549,9 +549,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
 		goto tx_error_icmp;
 
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
-	mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+	mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 	if (mtu < 68) {
 		ip_rt_put(rt);
 		IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
@@ -601,7 +601,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -615,7 +615,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	iph->daddr		=	rt->rt_dst;
 	iph->saddr		=	rt->rt_src;
 	iph->ttl		=	old_iph->ttl;
-	ip_select_ident(iph, &rt->u.dst, NULL);
+	ip_select_ident(iph, &rt->dst, NULL);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -660,12 +660,12 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!rt)
 		goto tx_error_icmp;
 
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
-	mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
+	mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
 	/* TODO IPv6: do we need this check in IPv6? */
 	if (mtu < 1280) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__);
 		goto tx_error;
 	}
@@ -674,7 +674,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
@@ -689,7 +689,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		struct sk_buff *new_skb =
 			skb_realloc_headroom(skb, max_headroom);
 		if (!new_skb) {
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			kfree_skb(skb);
 			IP_VS_ERR_RL("%s(): no memory\n", __func__);
 			return NF_STOLEN;
@@ -707,7 +707,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -760,7 +760,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		ip_rt_put(rt);
@@ -780,7 +780,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -813,10 +813,10 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
@@ -827,13 +827,13 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 */
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(skb == NULL)) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		return NF_STOLEN;
 	}
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -888,7 +888,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
@@ -900,12 +900,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, offset))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop the old route when skb is not shared */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	ip_vs_nat_icmp(skb, pp, cp, 0);
 
@@ -963,9 +963,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
@@ -975,12 +975,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, offset))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop the old route when skb is not shared */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
 
@@ -1001,7 +1001,7 @@ out:
 	LeaveFunction(10);
 	return rc;
 tx_error_put:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	goto tx_error;
 }
 #endif
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 6eaee7c8a337..b969025cf82f 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -734,11 +734,11 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 		if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) {
 			if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
 				if (rt1->rt_gateway == rt2->rt_gateway &&
-				    rt1->u.dst.dev  == rt2->u.dst.dev)
+				    rt1->dst.dev  == rt2->dst.dev)
 					ret = 1;
-				dst_release(&rt2->u.dst);
+				dst_release(&rt2->dst);
 			}
-			dst_release(&rt1->u.dst);
+			dst_release(&rt1->dst);
 		}
 		break;
 	}
@@ -753,11 +753,11 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 			if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
 				if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
 					    sizeof(rt1->rt6i_gateway)) &&
-				    rt1->u.dst.dev == rt2->u.dst.dev)
+				    rt1->dst.dev == rt2->dst.dev)
 					ret = 1;
-				dst_release(&rt2->u.dst);
+				dst_release(&rt2->dst);
 			}
-			dst_release(&rt1->u.dst);
+			dst_release(&rt1->dst);
 		}
 		break;
 	}
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 497b2224536f..aadde018a072 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -61,7 +61,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
 		goto out;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(rt->u.dst.dev);
+	in_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (in_dev != NULL) {
 		for_primary_ifa(in_dev) {
 			if (ifa->ifa_broadcast == iph->daddr) {
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 62ec021fbd50..1841388c770a 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -165,8 +165,8 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
 	rcu_read_unlock();
 
 	if (rt != NULL) {
-		mtu = dst_mtu(&rt->u.dst);
-		dst_release(&rt->u.dst);
+		mtu = dst_mtu(&rt->dst);
+		dst_release(&rt->dst);
 	}
 	return mtu;
 }
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 859d9fd429c8..c77a85bbd9eb 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -77,8 +77,8 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 		return false;
 
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
-	skb->dev      = rt->u.dst.dev;
+	skb_dst_set(skb, &rt->dst);
+	skb->dev      = rt->dst.dev;
 	skb->protocol = htons(ETH_P_IP);
 	return true;
 }
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index f0f85b0123f7..9f1729bd60de 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -64,8 +64,8 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 		return;
 	}
 
-	peer->if_mtu = dst_mtu(&rt->u.dst);
-	dst_release(&rt->u.dst);
+	peer->if_mtu = dst_mtu(&rt->dst);
+	dst_release(&rt->dst);
 
 	_leave(" [if_mtu %u]", peer->if_mtu);
 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 182749867c72..a0e1a7fdebbf 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -490,7 +490,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 			  __func__, &fl.fl4_dst, &fl.fl4_src);
 
 	if (!ip_route_output_key(&init_net, &rt, &fl)) {
-		dst = &rt->u.dst;
+		dst = &rt->dst;
 	}
 
 	/* If there is no association or if a source address is passed, no
@@ -534,7 +534,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 			fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
 			fl.fl_ip_sport = laddr->a.v4.sin_port;
 			if (!ip_route_output_key(&init_net, &rt, &fl)) {
-				dst = &rt->u.dst;
+				dst = &rt->dst;
 				goto out_unlock;
 			}
 		}
-- 
cgit v1.2.3-55-g7522


From c7de2cf053420d63bac85133469c965d4b1083e1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Wed, 9 Jun 2010 02:09:23 +0000
Subject: pkt_sched: gen_kill_estimator() rcu fixes

gen_kill_estimator() API is incomplete or not well documented, since
caller should make sure an RCU grace period is respected before
freeing stats_lock.

This was partially addressed in commit 5d944c640b4
(gen_estimator: deadlock fix), but same problem exist for all
gen_kill_estimator() users, if lock they use is not already RCU
protected.

A code review shows xt_RATEEST.c, act_api.c, act_police.c have this
problem. Other are ok because they use qdisc lock, already RCU
protected.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h              |  2 ++
 include/net/netfilter/xt_rateest.h |  1 +
 net/core/gen_estimator.c           |  1 +
 net/netfilter/xt_RATEEST.c         | 12 +++++++++++-
 net/sched/act_api.c                | 11 ++++++++++-
 net/sched/act_police.c             | 12 +++++++++++-
 6 files changed, 36 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index c05fd717c588..bab385f13ac3 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -20,6 +20,7 @@ struct tcf_common {
 	struct gnet_stats_queue		tcfc_qstats;
 	struct gnet_stats_rate_est	tcfc_rate_est;
 	spinlock_t			tcfc_lock;
+	struct rcu_head			tcfc_rcu;
 };
 #define tcf_next	common.tcfc_next
 #define tcf_index	common.tcfc_index
@@ -32,6 +33,7 @@ struct tcf_common {
 #define tcf_qstats	common.tcfc_qstats
 #define tcf_rate_est	common.tcfc_rate_est
 #define tcf_lock	common.tcfc_lock
+#define tcf_rcu		common.tcfc_rcu
 
 struct tcf_police {
 	struct tcf_common	common;
diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index ddbf37e19616..5e1427795928 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -9,6 +9,7 @@ struct xt_rateest {
 	struct gnet_estimator		params;
 	struct gnet_stats_rate_est	rstats;
 	struct gnet_stats_basic_packed	bstats;
+	struct rcu_head			rcu;
 };
 
 extern struct xt_rateest *xt_rateest_lookup(const char *name);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 785e5276a300..9fbe7f7429b0 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -263,6 +263,7 @@ static void __gen_kill_estimator(struct rcu_head *head)
  *
  * Removes the rate estimator specified by &bstats and &rate_est.
  *
+ * Note : Caller should respect an RCU grace period before freeing stats_lock
  */
 void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 			struct gnet_stats_rate_est *rate_est)
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 69c01e10f8af..de079abd5bc8 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -60,13 +60,22 @@ struct xt_rateest *xt_rateest_lookup(const char *name)
 }
 EXPORT_SYMBOL_GPL(xt_rateest_lookup);
 
+static void xt_rateest_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct xt_rateest, rcu));
+}
+
 void xt_rateest_put(struct xt_rateest *est)
 {
 	mutex_lock(&xt_rateest_mutex);
 	if (--est->refcnt == 0) {
 		hlist_del(&est->list);
 		gen_kill_estimator(&est->bstats, &est->rstats);
-		kfree(est);
+		/*
+		 * gen_estimator est_timer() might access est->lock or bstats,
+		 * wait a RCU grace period before freeing 'est'
+		 */
+		call_rcu(&est->rcu, xt_rateest_free_rcu);
 	}
 	mutex_unlock(&xt_rateest_mutex);
 }
@@ -179,6 +188,7 @@ static int __init xt_rateest_tg_init(void)
 static void __exit xt_rateest_tg_fini(void)
 {
 	xt_unregister_target(&xt_rateest_tg_reg);
+	rcu_barrier(); /* Wait for completion of call_rcu()'s (xt_rateest_free_rcu) */
 }
 
 
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 972378f47f3c..23b25f89e7e0 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -26,6 +26,11 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
+static void tcf_common_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct tcf_common, tcfc_rcu));
+}
+
 void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 {
 	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
@@ -38,7 +43,11 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 			write_unlock_bh(hinfo->lock);
 			gen_kill_estimator(&p->tcfc_bstats,
 					   &p->tcfc_rate_est);
-			kfree(p);
+			/*
+			 * gen_estimator est_timer() might access p->tcfc_lock
+			 * or bstats, wait a RCU grace period before freeing p
+			 */
+			call_rcu(&p->tcfc_rcu, tcf_common_free_rcu);
 			return;
 		}
 	}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 654f73dff7c1..537a48732e9e 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -97,6 +97,11 @@ nla_put_failure:
 	goto done;
 }
 
+static void tcf_police_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct tcf_police, tcf_rcu));
+}
+
 static void tcf_police_destroy(struct tcf_police *p)
 {
 	unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
@@ -113,7 +118,11 @@ static void tcf_police_destroy(struct tcf_police *p)
 				qdisc_put_rtab(p->tcfp_R_tab);
 			if (p->tcfp_P_tab)
 				qdisc_put_rtab(p->tcfp_P_tab);
-			kfree(p);
+			/*
+			 * gen_estimator est_timer() might access p->tcf_lock
+			 * or bstats, wait a RCU grace period before freeing p
+			 */
+			call_rcu(&p->tcf_rcu, tcf_police_free_rcu);
 			return;
 		}
 	}
@@ -397,6 +406,7 @@ static void __exit
 police_cleanup_module(void)
 {
 	tcf_unregister_action(&act_police_ops);
+	rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */
 }
 
 module_init(police_init_module);
-- 
cgit v1.2.3-55-g7522


From a3433f35a55f7604742cae620c6dc6edfc70db6a Mon Sep 17 00:00:00 2001
From: Changli Gao
Date: Sat, 12 Jun 2010 14:01:43 +0000
Subject: tcp: unify tcp flag macros

unify tcp flag macros: TCPHDR_FIN, TCPHDR_SYN, TCPHDR_RST, TCPHDR_PSH,
TCPHDR_ACK, TCPHDR_URG, TCPHDR_ECE and TCPHDR_CWR. TCBCB_FLAG_* are replaced
with the corresponding TCPHDR_*.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/tcp.h                      |   24 ++++++-------
 net/ipv4/tcp.c                         |    8 ++--
 net/ipv4/tcp_input.c                   |    2 -
 net/ipv4/tcp_output.c                  |   59 ++++++++++++++++-----------------
 net/netfilter/nf_conntrack_proto_tcp.c |   32 ++++++-----------
 net/netfilter/xt_TCPMSS.c              |    4 --
 6 files changed, 58 insertions(+), 71 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h                      | 24 +++++++-------
 net/ipv4/tcp.c                         |  8 ++---
 net/ipv4/tcp_input.c                   |  2 +-
 net/ipv4/tcp_output.c                  | 59 +++++++++++++++++-----------------
 net/netfilter/nf_conntrack_proto_tcp.c | 32 +++++++-----------
 net/netfilter/xt_TCPMSS.c              |  4 +--
 6 files changed, 58 insertions(+), 71 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 573166484413..9e68e25c8b82 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -602,6 +602,17 @@ extern u32	__tcp_select_window(struct sock *sk);
  */
 #define tcp_time_stamp		((__u32)(jiffies))
 
+#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
+
+#define TCPHDR_FIN 0x01
+#define TCPHDR_SYN 0x02
+#define TCPHDR_RST 0x04
+#define TCPHDR_PSH 0x08
+#define TCPHDR_ACK 0x10
+#define TCPHDR_URG 0x20
+#define TCPHDR_ECE 0x40
+#define TCPHDR_CWR 0x80
+
 /* This is what the send packet queuing engine uses to pass
  * TCP per-packet control information to the transmission
  * code.  We also store the host-order sequence numbers in
@@ -620,19 +631,6 @@ struct tcp_skb_cb {
 	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
 	__u32		when;		/* used to compute rtt's	*/
 	__u8		flags;		/* TCP header flags.		*/
-
-	/* NOTE: These must match up to the flags byte in a
-	 *       real TCP header.
-	 */
-#define TCPCB_FLAG_FIN		0x01
-#define TCPCB_FLAG_SYN		0x02
-#define TCPCB_FLAG_RST		0x04
-#define TCPCB_FLAG_PSH		0x08
-#define TCPCB_FLAG_ACK		0x10
-#define TCPCB_FLAG_URG		0x20
-#define TCPCB_FLAG_ECE		0x40
-#define TCPCB_FLAG_CWR		0x80
-
 	__u8		sacked;		/* State flags for SACK/FACK.	*/
 #define TCPCB_SACKED_ACKED	0x01	/* SKB ACK'd by a SACK block	*/
 #define TCPCB_SACKED_RETRANS	0x02	/* SKB retransmitted		*/
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 49d0d2b8900c..779d40c3b96e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -511,7 +511,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+	TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
 	tp->pushed_seq = tp->write_seq;
 }
 
@@ -527,7 +527,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
 
 	skb->csum    = 0;
 	tcb->seq     = tcb->end_seq = tp->write_seq;
-	tcb->flags   = TCPCB_FLAG_ACK;
+	tcb->flags   = TCPHDR_ACK;
 	tcb->sacked  = 0;
 	skb_header_release(skb);
 	tcp_add_write_queue_tail(sk, skb);
@@ -815,7 +815,7 @@ new_segment:
 		skb_shinfo(skb)->gso_segs = 0;
 
 		if (!copied)
-			TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
+			TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
 
 		copied += copy;
 		poffset += copy;
@@ -1061,7 +1061,7 @@ new_segment:
 			}
 
 			if (!copied)
-				TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
+				TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
 
 			tp->write_seq += copy;
 			TCP_SKB_CB(skb)->end_seq += copy;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 548d575e6cc6..04334661fa28 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3286,7 +3286,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		 * connection startup slow start one packet too
 		 * quickly.  This is severely frowned upon behavior.
 		 */
-		if (!(scb->flags & TCPCB_FLAG_SYN)) {
+		if (!(scb->flags & TCPHDR_SYN)) {
 			flag |= FLAG_DATA_ACKED;
 		} else {
 			flag |= FLAG_SYN_ACKED;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4ed957f201a..51d316dbb058 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -294,9 +294,9 @@ static u16 tcp_select_window(struct sock *sk)
 /* Packet ECN state for a SYN-ACK */
 static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
+	TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR;
 	if (!(tp->ecn_flags & TCP_ECN_OK))
-		TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
+		TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE;
 }
 
 /* Packet ECN state for a SYN.  */
@@ -306,7 +306,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
 
 	tp->ecn_flags = 0;
 	if (sysctl_tcp_ecn == 1) {
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR;
 		tp->ecn_flags = TCP_ECN_OK;
 	}
 }
@@ -361,7 +361,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
 	skb_shinfo(skb)->gso_type = 0;
 
 	TCP_SKB_CB(skb)->seq = seq;
-	if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN))
+	if (flags & (TCPHDR_SYN | TCPHDR_FIN))
 		seq++;
 	TCP_SKB_CB(skb)->end_seq = seq;
 }
@@ -820,7 +820,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	tcb = TCP_SKB_CB(skb);
 	memset(&opts, 0, sizeof(opts));
 
-	if (unlikely(tcb->flags & TCPCB_FLAG_SYN))
+	if (unlikely(tcb->flags & TCPHDR_SYN))
 		tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
 	else
 		tcp_options_size = tcp_established_options(sk, skb, &opts,
@@ -843,7 +843,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	*(((__be16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) |
 					tcb->flags);
 
-	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+	if (unlikely(tcb->flags & TCPHDR_SYN)) {
 		/* RFC1323: The window in SYN & SYN/ACK segments
 		 * is never scaled.
 		 */
@@ -866,7 +866,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	}
 
 	tcp_options_write((__be32 *)(th + 1), tp, &opts);
-	if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0))
+	if (likely((tcb->flags & TCPHDR_SYN) == 0))
 		TCP_ECN_send(sk, skb, tcp_header_size);
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -880,7 +880,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 
 	icsk->icsk_af_ops->send_check(sk, skb);
 
-	if (likely(tcb->flags & TCPCB_FLAG_ACK))
+	if (likely(tcb->flags & TCPHDR_ACK))
 		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
 
 	if (skb->len != tcp_header_size)
@@ -1023,7 +1023,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 
 	/* PSH and FIN should only be set in the second packet. */
 	flags = TCP_SKB_CB(skb)->flags;
-	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
+	TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
 	TCP_SKB_CB(buff)->flags = flags;
 	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
 
@@ -1328,8 +1328,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
 	u32 in_flight, cwnd;
 
 	/* Don't be strict about the congestion window for the final FIN.  */
-	if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
-	    tcp_skb_pcount(skb) == 1)
+	if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1)
 		return 1;
 
 	in_flight = tcp_packets_in_flight(tp);
@@ -1398,7 +1397,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
 	 * Nagle can be ignored during F-RTO too (see RFC4138).
 	 */
 	if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
-	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
+	    (TCP_SKB_CB(skb)->flags & TCPHDR_FIN))
 		return 1;
 
 	if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1487,7 +1486,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 
 	/* PSH and FIN should only be set in the second packet. */
 	flags = TCP_SKB_CB(skb)->flags;
-	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
+	TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
 	TCP_SKB_CB(buff)->flags = flags;
 
 	/* This packet was never sent out yet, so no SACK bits. */
@@ -1518,7 +1517,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 send_win, cong_win, limit, in_flight;
 
-	if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
+	if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
 		goto send_now;
 
 	if (icsk->icsk_ca_state != TCP_CA_Open)
@@ -1644,7 +1643,7 @@ static int tcp_mtu_probe(struct sock *sk)
 
 	TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
-	TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK;
+	TCP_SKB_CB(nskb)->flags = TCPHDR_ACK;
 	TCP_SKB_CB(nskb)->sacked = 0;
 	nskb->csum = 0;
 	nskb->ip_summed = skb->ip_summed;
@@ -1669,7 +1668,7 @@ static int tcp_mtu_probe(struct sock *sk)
 			sk_wmem_free_skb(sk, skb);
 		} else {
 			TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
-						   ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
+						   ~(TCPHDR_FIN|TCPHDR_PSH);
 			if (!skb_shinfo(skb)->nr_frags) {
 				skb_pull(skb, copy);
 				if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -2020,7 +2019,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
 
 	if (!sysctl_tcp_retrans_collapse)
 		return;
-	if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)
+	if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN)
 		return;
 
 	tcp_for_write_queue_from_safe(skb, tmp, sk) {
@@ -2112,7 +2111,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 * since it is cheap to do so and saves bytes on the network.
 	 */
 	if (skb->len > 0 &&
-	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
+	    (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) &&
 	    tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
 		if (!pskb_trim(skb, 0)) {
 			/* Reuse, even though it does some unnecessary work */
@@ -2301,7 +2300,7 @@ void tcp_send_fin(struct sock *sk)
 	mss_now = tcp_current_mss(sk);
 
 	if (tcp_send_head(sk) != NULL) {
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_FIN;
 		TCP_SKB_CB(skb)->end_seq++;
 		tp->write_seq++;
 	} else {
@@ -2318,7 +2317,7 @@ void tcp_send_fin(struct sock *sk)
 		skb_reserve(skb, MAX_TCP_HEADER);
 		/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
 		tcp_init_nondata_skb(skb, tp->write_seq,
-				     TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
+				     TCPHDR_ACK | TCPHDR_FIN);
 		tcp_queue_skb(sk, skb);
 	}
 	__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
@@ -2343,7 +2342,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	/* Reserve space for headers and prepare control bits. */
 	skb_reserve(skb, MAX_TCP_HEADER);
 	tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
-			     TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
+			     TCPHDR_ACK | TCPHDR_RST);
 	/* Send it off. */
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	if (tcp_transmit_skb(sk, skb, 0, priority))
@@ -2363,11 +2362,11 @@ int tcp_send_synack(struct sock *sk)
 	struct sk_buff *skb;
 
 	skb = tcp_write_queue_head(sk);
-	if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) {
+	if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) {
 		printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
 		return -EFAULT;
 	}
-	if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) {
+	if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) {
 		if (skb_cloned(skb)) {
 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
 			if (nskb == NULL)
@@ -2381,7 +2380,7 @@ int tcp_send_synack(struct sock *sk)
 			skb = nskb;
 		}
 
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_ACK;
 		TCP_ECN_send_synack(tcp_sk(sk), skb);
 	}
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2460,7 +2459,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	 * not even correctly set)
 	 */
 	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
-			     TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
+			     TCPHDR_SYN | TCPHDR_ACK);
 
 	if (OPTION_COOKIE_EXTENSION & opts.options) {
 		if (s_data_desired) {
@@ -2592,7 +2591,7 @@ int tcp_connect(struct sock *sk)
 	skb_reserve(buff, MAX_TCP_HEADER);
 
 	tp->snd_nxt = tp->write_seq;
-	tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
+	tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
 	TCP_ECN_send_syn(sk, buff);
 
 	/* Send it off. */
@@ -2698,7 +2697,7 @@ void tcp_send_ack(struct sock *sk)
 
 	/* Reserve space for headers and prepare control bits. */
 	skb_reserve(buff, MAX_TCP_HEADER);
-	tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
+	tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
 
 	/* Send it off, this clears delayed acks for us. */
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
@@ -2732,7 +2731,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	 * end to send an ack.  Don't queue or clone SKB, just
 	 * send it.
 	 */
-	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK);
+	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
@@ -2762,13 +2761,13 @@ int tcp_write_wakeup(struct sock *sk)
 		if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
 		    skb->len > mss) {
 			seg_size = min(seg_size, mss);
-			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+			TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
 			if (tcp_fragment(sk, skb, seg_size, mss))
 				return -1;
 		} else if (!tcp_skb_pcount(skb))
 			tcp_set_skb_tso_segs(sk, skb, mss);
 
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 		err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 		if (!err)
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9dd8cd4fb6e6..802dbffae8b4 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -736,27 +736,19 @@ static bool tcp_in_window(const struct nf_conn *ct,
 	return res;
 }
 
-#define	TH_FIN	0x01
-#define	TH_SYN	0x02
-#define	TH_RST	0x04
-#define	TH_PUSH	0x08
-#define	TH_ACK	0x10
-#define	TH_URG	0x20
-#define	TH_ECE	0x40
-#define	TH_CWR	0x80
-
 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
-static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
+static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
+				 TCPHDR_URG) + 1] =
 {
-	[TH_SYN]			= 1,
-	[TH_SYN|TH_URG]			= 1,
-	[TH_SYN|TH_ACK]			= 1,
-	[TH_RST]			= 1,
-	[TH_RST|TH_ACK]			= 1,
-	[TH_FIN|TH_ACK]			= 1,
-	[TH_FIN|TH_ACK|TH_URG]		= 1,
-	[TH_ACK]			= 1,
-	[TH_ACK|TH_URG]			= 1,
+	[TCPHDR_SYN]				= 1,
+	[TCPHDR_SYN|TCPHDR_URG]			= 1,
+	[TCPHDR_SYN|TCPHDR_ACK]			= 1,
+	[TCPHDR_RST]				= 1,
+	[TCPHDR_RST|TCPHDR_ACK]			= 1,
+	[TCPHDR_FIN|TCPHDR_ACK]			= 1,
+	[TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]	= 1,
+	[TCPHDR_ACK]				= 1,
+	[TCPHDR_ACK|TCPHDR_URG]			= 1,
 };
 
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
@@ -803,7 +795,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
 	}
 
 	/* Check TCP flags. */
-	tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
+	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
 	if (!tcp_valid_flags[tcpflags]) {
 		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 1841388c770a..eb81c380da1b 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -220,15 +220,13 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 }
 #endif
 
-#define TH_SYN 0x02
-
 /* Must specify -p tcp --syn */
 static inline bool find_syn_match(const struct xt_entry_match *m)
 {
 	const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
 
 	if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
-	    tcpinfo->flg_cmp & TH_SYN &&
+	    tcpinfo->flg_cmp & TCPHDR_SYN &&
 	    !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
 		return true;
 
-- 
cgit v1.2.3-55-g7522


From aa1039e73cc2cf834e99c09d2033d5d2675357b9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Tue, 15 Jun 2010 08:23:14 +0000
Subject: inetpeer: RCU conversion

inetpeer currently uses an AVL tree protected by an rwlock.

It's possible to make most lookups use RCU

1) Add a struct rcu_head to struct inet_peer

2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().

3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.

4) add an smp_wmb() in link_to_pool() right before node insert.

5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.

6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.

7) inet_getpeer() first attempts lockless lookup.
   Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
   If this attemps fails, lock is taken a regular lookup is performed
again.

8) convert peers.lock from rwlock to a spinlock

9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |   1 +
 net/ipv4/inetpeer.c    | 164 ++++++++++++++++++++++++++++---------------------
 2 files changed, 96 insertions(+), 69 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 87b1df0d4d8c..617404730422 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -26,6 +26,7 @@ struct inet_peer {
 	atomic_t		ip_id_count;	/* IP ID for the next packet */
 	__u32			tcp_ts;
 	__u32			tcp_ts_stamp;
+	struct rcu_head		rcu;
 };
 
 void			inet_initpeers(void) __init;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 035673fd42d4..58fbc7e2475e 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -51,8 +51,8 @@
  *  lookups performed with disabled BHs.
  *
  *  Serialisation issues.
- *  1.  Nodes may appear in the tree only with the pool write lock held.
- *  2.  Nodes may disappear from the tree only with the pool write lock held
+ *  1.  Nodes may appear in the tree only with the pool lock held.
+ *  2.  Nodes may disappear from the tree only with the pool lock held
  *      AND reference count being 0.
  *  3.  Nodes appears and disappears from unused node list only under
  *      "inet_peer_unused_lock".
@@ -80,11 +80,11 @@ static const struct inet_peer peer_fake_node = {
 
 static struct {
 	struct inet_peer *root;
-	rwlock_t	lock;
+	spinlock_t	lock;
 	int		total;
 } peers = {
 	.root		= peer_avl_empty,
-	.lock		= __RW_LOCK_UNLOCKED(peers.lock),
+	.lock		= __SPIN_LOCK_UNLOCKED(peers.lock),
 	.total		= 0,
 };
 #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
@@ -129,7 +129,7 @@ void __init inet_initpeers(void)
 
 	peer_cachep = kmem_cache_create("inet_peer_cache",
 			sizeof(struct inet_peer),
-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+			0, SLAB_PANIC,
 			NULL);
 
 	/* All the timers, started at system startup tend
@@ -153,16 +153,13 @@ static void unlink_from_unused(struct inet_peer *p)
 
 /*
  * Called with local BH disabled and the pool lock held.
- * _stack is known to be NULL or not at compile time,
- * so compiler will optimize the if (_stack) tests.
  */
 #define lookup(_daddr, _stack) 					\
 ({								\
 	struct inet_peer *u, **v;				\
-	if (_stack != NULL) {					\
-		stackptr = _stack;				\
-		*stackptr++ = &peers.root;			\
-	}							\
+								\
+	stackptr = _stack;					\
+	*stackptr++ = &peers.root;				\
 	for (u = peers.root; u != peer_avl_empty; ) {		\
 		if (_daddr == u->v4daddr)			\
 			break;					\
@@ -170,14 +167,41 @@ static void unlink_from_unused(struct inet_peer *p)
 			v = &u->avl_left;			\
 		else						\
 			v = &u->avl_right;			\
-		if (_stack != NULL)				\
-			*stackptr++ = v;			\
+		*stackptr++ = v;				\
 		u = *v;						\
 	}							\
 	u;							\
 })
 
-/* Called with local BH disabled and the pool write lock held. */
+/*
+ * Called with rcu_read_lock_bh()
+ * Because we hold no lock against a writer, its quite possible we fall
+ * in an endless loop.
+ * But every pointer we follow is guaranteed to be valid thanks to RCU.
+ * We exit from this function if number of links exceeds PEER_MAXDEPTH
+ */
+static struct inet_peer *lookup_rcu_bh(__be32 daddr)
+{
+	struct inet_peer *u = rcu_dereference_bh(peers.root);
+	int count = 0;
+
+	while (u != peer_avl_empty) {
+		if (daddr == u->v4daddr) {
+			if (unlikely(!atomic_inc_not_zero(&u->refcnt)))
+				u = NULL;
+			return u;
+		}
+		if ((__force __u32)daddr < (__force __u32)u->v4daddr)
+			u = rcu_dereference_bh(u->avl_left);
+		else
+			u = rcu_dereference_bh(u->avl_right);
+		if (unlikely(++count == PEER_MAXDEPTH))
+			break;
+	}
+	return NULL;
+}
+
+/* Called with local BH disabled and the pool lock held. */
 #define lookup_rightempty(start)				\
 ({								\
 	struct inet_peer *u, **v;				\
@@ -191,9 +215,10 @@ static void unlink_from_unused(struct inet_peer *p)
 	u;							\
 })
 
-/* Called with local BH disabled and the pool write lock held.
+/* Called with local BH disabled and the pool lock held.
  * Variable names are the proof of operation correctness.
- * Look into mm/map_avl.c for more detail description of the ideas.  */
+ * Look into mm/map_avl.c for more detail description of the ideas.
+ */
 static void peer_avl_rebalance(struct inet_peer **stack[],
 		struct inet_peer ***stackend)
 {
@@ -269,16 +294,22 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
 	}
 }
 
-/* Called with local BH disabled and the pool write lock held. */
+/* Called with local BH disabled and the pool lock held. */
 #define link_to_pool(n)						\
 do {								\
 	n->avl_height = 1;					\
 	n->avl_left = peer_avl_empty;				\
 	n->avl_right = peer_avl_empty;				\
+	smp_wmb(); /* lockless readers can catch us now */	\
 	**--stackptr = n;					\
 	peer_avl_rebalance(stack, stackptr);			\
 } while (0)
 
+static void inetpeer_free_rcu(struct rcu_head *head)
+{
+	kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
+}
+
 /* May be called with local BH enabled. */
 static void unlink_from_pool(struct inet_peer *p)
 {
@@ -286,13 +317,13 @@ static void unlink_from_pool(struct inet_peer *p)
 
 	do_free = 0;
 
-	write_lock_bh(&peers.lock);
+	spin_lock_bh(&peers.lock);
 	/* Check the reference counter.  It was artificially incremented by 1
-	 * in cleanup() function to prevent sudden disappearing.  If the
-	 * reference count is still 1 then the node is referenced only as `p'
-	 * here and from the pool.  So under the exclusive pool lock it's safe
-	 * to remove the node and free it later. */
-	if (atomic_read(&p->refcnt) == 1) {
+	 * in cleanup() function to prevent sudden disappearing.  If we can
+	 * atomically (because of lockless readers) take this last reference,
+	 * it's safe to remove the node and free it later.
+	 */
+	if (atomic_cmpxchg(&p->refcnt, 1, 0) == 1) {
 		struct inet_peer **stack[PEER_MAXDEPTH];
 		struct inet_peer ***stackptr, ***delp;
 		if (lookup(p->v4daddr, stack) != p)
@@ -321,17 +352,18 @@ static void unlink_from_pool(struct inet_peer *p)
 		peers.total--;
 		do_free = 1;
 	}
-	write_unlock_bh(&peers.lock);
+	spin_unlock_bh(&peers.lock);
 
 	if (do_free)
-		kmem_cache_free(peer_cachep, p);
+		call_rcu_bh(&p->rcu, inetpeer_free_rcu);
 	else
 		/* The node is used again.  Decrease the reference counter
 		 * back.  The loop "cleanup -> unlink_from_unused
 		 *   -> unlink_from_pool -> putpeer -> link_to_unused
 		 *   -> cleanup (for the same node)"
 		 * doesn't really exist because the entry will have a
-		 * recent deletion time and will not be cleaned again soon. */
+		 * recent deletion time and will not be cleaned again soon.
+		 */
 		inet_putpeer(p);
 }
 
@@ -375,62 +407,56 @@ static int cleanup_once(unsigned long ttl)
 /* Called with or without local BH being disabled. */
 struct inet_peer *inet_getpeer(__be32 daddr, int create)
 {
-	struct inet_peer *p, *n;
+	struct inet_peer *p;
 	struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
 
-	/* Look up for the address quickly. */
-	read_lock_bh(&peers.lock);
-	p = lookup(daddr, NULL);
-	if (p != peer_avl_empty)
-		atomic_inc(&p->refcnt);
-	read_unlock_bh(&peers.lock);
+	/* Look up for the address quickly, lockless.
+	 * Because of a concurrent writer, we might not find an existing entry.
+	 */
+	rcu_read_lock_bh();
+	p = lookup_rcu_bh(daddr);
+	rcu_read_unlock_bh();
+
+	if (p) {
+		/* The existing node has been found.
+		 * Remove the entry from unused list if it was there.
+		 */
+		unlink_from_unused(p);
+		return p;
+	}
 
+	/* retry an exact lookup, taking the lock before.
+	 * At least, nodes should be hot in our cache.
+	 */
+	spin_lock_bh(&peers.lock);
+	p = lookup(daddr, stack);
 	if (p != peer_avl_empty) {
-		/* The existing node has been found. */
+		atomic_inc(&p->refcnt);
+		spin_unlock_bh(&peers.lock);
 		/* Remove the entry from unused list if it was there. */
 		unlink_from_unused(p);
 		return p;
 	}
-
-	if (!create)
-		return NULL;
-
-	/* Allocate the space outside the locked region. */
-	n = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
-	if (n == NULL)
-		return NULL;
-	n->v4daddr = daddr;
-	atomic_set(&n->refcnt, 1);
-	atomic_set(&n->rid, 0);
-	atomic_set(&n->ip_id_count, secure_ip_id(daddr));
-	n->tcp_ts_stamp = 0;
-
-	write_lock_bh(&peers.lock);
-	/* Check if an entry has suddenly appeared. */
-	p = lookup(daddr, stack);
-	if (p != peer_avl_empty)
-		goto out_free;
-
-	/* Link the node. */
-	link_to_pool(n);
-	INIT_LIST_HEAD(&n->unused);
-	peers.total++;
-	write_unlock_bh(&peers.lock);
+	p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
+	if (p) {
+		p->v4daddr = daddr;
+		atomic_set(&p->refcnt, 1);
+		atomic_set(&p->rid, 0);
+		atomic_set(&p->ip_id_count, secure_ip_id(daddr));
+		p->tcp_ts_stamp = 0;
+		INIT_LIST_HEAD(&p->unused);
+
+
+		/* Link the node. */
+		link_to_pool(p);
+		peers.total++;
+	}
+	spin_unlock_bh(&peers.lock);
 
 	if (peers.total >= inet_peer_threshold)
 		/* Remove one less-recently-used entry. */
 		cleanup_once(0);
 
-	return n;
-
-out_free:
-	/* The appropriate node is already in the pool. */
-	atomic_inc(&p->refcnt);
-	write_unlock_bh(&peers.lock);
-	/* Remove the entry from unused list if it was there. */
-	unlink_from_unused(p);
-	/* Free preallocated the preallocated node. */
-	kmem_cache_free(peer_cachep, n);
 	return p;
 }
 
-- 
cgit v1.2.3-55-g7522


From 317fe0e6c5dc9448bcef41a2e31fecfd3dba7f55 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Wed, 16 Jun 2010 04:52:13 +0000
Subject: inetpeer: restore small inet_peer structures

Addition of rcu_head to struct inet_peer added 16bytes on 64bit arches.

Thats a bit unfortunate, since old size was exactly 64 bytes.

This can be solved, using an union between this rcu_head an four fields,
that are normally used only when a refcount is taken on inet_peer.
rcu_head is used only when refcnt=-1, right before structure freeing.

Add a inet_peer_refcheck() function to check this assertion for a while.

We can bring back SLAB_HWCACHE_ALIGN qualifier in kmem cache creation.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 31 ++++++++++++++++++++++++++-----
 net/ipv4/inetpeer.c    |  4 ++--
 net/ipv4/route.c       |  1 +
 net/ipv4/tcp_ipv4.c    | 11 +++++++----
 4 files changed, 36 insertions(+), 11 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 617404730422..417d0c894f29 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -22,11 +22,21 @@ struct inet_peer {
 	__u32			dtime;		/* the time of last use of not
 						 * referenced entries */
 	atomic_t		refcnt;
-	atomic_t		rid;		/* Frag reception counter */
-	atomic_t		ip_id_count;	/* IP ID for the next packet */
-	__u32			tcp_ts;
-	__u32			tcp_ts_stamp;
-	struct rcu_head		rcu;
+	/*
+	 * Once inet_peer is queued for deletion (refcnt == -1), following fields
+	 * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
+	 * We can share memory with rcu_head to keep inet_peer small
+	 * (less then 64 bytes)
+	 */
+	union {
+		struct {
+			atomic_t	rid;		/* Frag reception counter */
+			atomic_t	ip_id_count;	/* IP ID for the next packet */
+			__u32		tcp_ts;
+			__u32		tcp_ts_stamp;
+		};
+		struct rcu_head         rcu;
+	};
 };
 
 void			inet_initpeers(void) __init;
@@ -37,10 +47,21 @@ struct inet_peer	*inet_getpeer(__be32 daddr, int create);
 /* can be called from BH context or outside */
 extern void inet_putpeer(struct inet_peer *p);
 
+/*
+ * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
+ * tcp_ts_stamp if no refcount is taken on inet_peer
+ */
+static inline void inet_peer_refcheck(const struct inet_peer *p)
+{
+	WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
+}
+
+
 /* can be called with or without local BH being disabled */
 static inline __u16	inet_getid(struct inet_peer *p, int more)
 {
 	more++;
+	inet_peer_refcheck(p);
 	return atomic_add_return(more, &p->ip_id_count) - more;
 }
 
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 349249fad2db..9ffa24b9a804 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -64,7 +64,7 @@
  *		   usually under some other lock to prevent node disappearing
  *		dtime: unused node list lock
  *		v4daddr: unchangeable
- *		ip_id_count: idlock
+ *		ip_id_count: atomic value (no lock needed)
  */
 
 static struct kmem_cache *peer_cachep __read_mostly;
@@ -129,7 +129,7 @@ void __init inet_initpeers(void)
 
 	peer_cachep = kmem_cache_create("inet_peer_cache",
 			sizeof(struct inet_peer),
-			0, SLAB_PANIC,
+			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
 			NULL);
 
 	/* All the timers, started at system startup tend
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a291edbbc97f..03430de46166 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2881,6 +2881,7 @@ static int rt_fill_info(struct net *net,
 	error = rt->dst.error;
 	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
 	if (rt->peer) {
+		inet_peer_refcheck(rt->peer);
 		id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
 		if (rt->peer->tcp_ts_stamp) {
 			ts = rt->peer->tcp_ts;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7f9515c0379f..2e41e6f92968 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -204,10 +204,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
 		 * when trying new connection.
 		 */
-		if (peer != NULL &&
-		    (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
-			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
-			tp->rx_opt.ts_recent = peer->tcp_ts;
+		if (peer) {
+			inet_peer_refcheck(peer);
+			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
+				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
+				tp->rx_opt.ts_recent = peer->tcp_ts;
+			}
 		}
 	}
 
@@ -1351,6 +1353,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
 		    peer->v4daddr == saddr) {
+			inet_peer_refcheck(peer);
 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
 			    (s32)(peer->tcp_ts - req->ts_recent) >
 							TCP_PAWS_WINDOW) {
-- 
cgit v1.2.3-55-g7522


From 8c76368174ed2359739f1b7b8a9c042b1ef839c4 Mon Sep 17 00:00:00 2001
From: Florian Westphal
Date: Wed, 16 Jun 2010 14:42:15 -0700
Subject: syncookies: check decoded options against sysctl settings

Discard the ACK if we find options that do not match current sysctl
settings.

Previously it was possible to create a connection with sack, wscale,
etc. enabled even if the feature was disabled via sysctl.

Also remove an unneeded call to tcp_sack_reset() in
cookie_check_timestamp: Both call sites (cookie_v4_check,
cookie_v6_check) zero "struct tcp_options_received", hand it to
tcp_parse_options() (which does not change tcp_opt->num_sacks/dsack)
and then call cookie_check_timestamp().

Even if num_sacks/dsacks were changed, the structure is allocated on
the stack and after cookie_check_timestamp returns only a few selected
members are copied to the inet_request_sock.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  2 +-
 net/ipv4/syncookies.c | 25 +++++++++++++++++++------
 net/ipv6/syncookies.c |  4 ++--
 3 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9e68e25c8b82..18c246c9b009 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -464,7 +464,7 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
 				     __u16 *mss);
 
 extern __u32 cookie_init_timestamp(struct request_sock *req);
-extern void cookie_check_timestamp(struct tcp_options_received *tcp_opt);
+extern bool cookie_check_timestamp(struct tcp_options_received *tcp_opt);
 
 /* From net/ipv6/syncookies.c */
 extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 02bef6aa8b30..51b5662545d6 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -230,23 +230,36 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  * The lowest 4 bits are for snd_wscale
  * The next 4 lsb are for rcv_wscale
  * The next lsb is for sack_ok
+ *
+ * return false if we decode an option that should not be.
  */
-void cookie_check_timestamp(struct tcp_options_received *tcp_opt)
+bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 {
 	/* echoed timestamp, 9 lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
 
+	if (!tcp_opt->saw_tstamp)  {
+		tcp_clear_options(tcp_opt);
+		return true;
+	}
+
+	if (!sysctl_tcp_timestamps)
+		return false;
+
 	tcp_opt->snd_wscale = options & 0xf;
 	options >>= 4;
 	tcp_opt->rcv_wscale = options & 0xf;
 
 	tcp_opt->sack_ok = (options >> 4) & 0x1;
 
-	if (tcp_opt->sack_ok)
-		tcp_sack_reset(tcp_opt);
+	if (tcp_opt->sack_ok && !sysctl_tcp_sack)
+		return false;
 
-	if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale)
+	if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) {
 		tcp_opt->wscale_ok = 1;
+		return sysctl_tcp_window_scaling != 0;
+	}
+	return true;
 }
 EXPORT_SYMBOL(cookie_check_timestamp);
 
@@ -281,8 +294,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
+	if (!cookie_check_timestamp(&tcp_opt))
+		goto out;
 
 	ret = NULL;
 	req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 70d330f8c990..c7ee57421ece 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -180,8 +180,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
+	if (!cookie_check_timestamp(&tcp_opt))
+		goto out;
 
 	ret = NULL;
 	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
-- 
cgit v1.2.3-55-g7522


From 812e876e842488221aa54cb4587a8a33445cfa9e Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Sun, 13 Jun 2010 03:27:04 +0000
Subject: scm: Reorder scm_cookie.

Reorder the fields in scm_cookie so they pack better on 64bit.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/scm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/scm.h b/include/net/scm.h
index 8360e47aa7e3..17d9d2e75ff1 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -19,8 +19,8 @@ struct scm_fp_list {
 };
 
 struct scm_cookie {
-	struct ucred		creds;		/* Skb credentials	*/
 	struct scm_fp_list	*fp;		/* Passed files		*/
+	struct ucred		creds;		/* Skb credentials	*/
 #ifdef CONFIG_SECURITY_NETWORK
 	u32			secid;		/* Passed security ID 	*/
 #endif
-- 
cgit v1.2.3-55-g7522


From 109f6e39fa07c48f580125f531f46cb7c245b528 Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Sun, 13 Jun 2010 03:30:14 +0000
Subject: af_unix: Allow SO_PEERCRED to work across namespaces.

Use struct pid and struct cred to store the peer credentials on struct
sock.  This gives enough information to convert the peer credential
information to a value relative to whatever namespace the socket is in
at the time.

This removes nasty surprises when using SO_PEERCRED on socket
connetions where the processes on either side are in different pid and
user namespaces.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  3 ++-
 net/core/sock.c    | 18 ++++++++++++------
 net/unix/af_unix.c | 37 ++++++++++++++++++++++++++++---------
 3 files changed, 42 insertions(+), 16 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index f8acf38f092f..4f26f2f83be9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -295,7 +295,8 @@ struct sock {
 	unsigned short		sk_ack_backlog;
 	unsigned short		sk_max_ack_backlog;
 	__u32			sk_priority;
-	struct ucred		sk_peercred;
+	struct pid		*sk_peer_pid;
+	const struct cred	*sk_peer_cred;
 	long			sk_rcvtimeo;
 	long			sk_sndtimeo;
 	struct sk_filter      	*sk_filter;
diff --git a/net/core/sock.c b/net/core/sock.c
index db8335ad7559..0229d5566a46 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -915,11 +915,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case SO_PEERCRED:
-		if (len > sizeof(sk->sk_peercred))
-			len = sizeof(sk->sk_peercred);
-		if (copy_to_user(optval, &sk->sk_peercred, len))
+	{
+		struct ucred peercred;
+		if (len > sizeof(peercred))
+			len = sizeof(peercred);
+		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+		if (copy_to_user(optval, &peercred, len))
 			return -EFAULT;
 		goto lenout;
+	}
 
 	case SO_PEERNAME:
 	{
@@ -1133,6 +1137,9 @@ static void __sk_free(struct sock *sk)
 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
 		       __func__, atomic_read(&sk->sk_omem_alloc));
 
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	put_pid(sk->sk_peer_pid);
 	put_net(sock_net(sk));
 	sk_prot_free(sk->sk_prot_creator, sk);
 }
@@ -1968,9 +1975,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_sndmsg_page	=	NULL;
 	sk->sk_sndmsg_off	=	0;
 
-	sk->sk_peercred.pid 	=	0;
-	sk->sk_peercred.uid	=	-1;
-	sk->sk_peercred.gid	=	-1;
+	sk->sk_peer_pid 	=	NULL;
+	sk->sk_peer_cred	=	NULL;
 	sk->sk_write_pending	=	0;
 	sk->sk_rcvlowat		=	1;
 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index fef2cc5e9d2b..e1f1349fae86 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -450,11 +450,31 @@ static int unix_release_sock(struct sock *sk, int embrion)
 	return 0;
 }
 
+static void init_peercred(struct sock *sk)
+{
+	put_pid(sk->sk_peer_pid);
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	sk->sk_peer_pid  = get_pid(task_tgid(current));
+	sk->sk_peer_cred = get_current_cred();
+}
+
+static void copy_peercred(struct sock *sk, struct sock *peersk)
+{
+	put_pid(sk->sk_peer_pid);
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
+	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
+}
+
 static int unix_listen(struct socket *sock, int backlog)
 {
 	int err;
 	struct sock *sk = sock->sk;
 	struct unix_sock *u = unix_sk(sk);
+	struct pid *old_pid = NULL;
+	const struct cred *old_cred = NULL;
 
 	err = -EOPNOTSUPP;
 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
@@ -470,12 +490,14 @@ static int unix_listen(struct socket *sock, int backlog)
 	sk->sk_max_ack_backlog	= backlog;
 	sk->sk_state		= TCP_LISTEN;
 	/* set credentials so connect can copy them */
-	sk->sk_peercred.pid	= task_tgid_vnr(current);
-	current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
+	init_peercred(sk);
 	err = 0;
 
 out_unlock:
 	unix_state_unlock(sk);
+	put_pid(old_pid);
+	if (old_cred)
+		put_cred(old_cred);
 out:
 	return err;
 }
@@ -1140,8 +1162,7 @@ restart:
 	unix_peer(newsk)	= sk;
 	newsk->sk_state		= TCP_ESTABLISHED;
 	newsk->sk_type		= sk->sk_type;
-	newsk->sk_peercred.pid	= task_tgid_vnr(current);
-	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
+	init_peercred(newsk);
 	newu = unix_sk(newsk);
 	newsk->sk_wq		= &newu->peer_wq;
 	otheru = unix_sk(other);
@@ -1157,7 +1178,7 @@ restart:
 	}
 
 	/* Set credentials */
-	sk->sk_peercred = other->sk_peercred;
+	copy_peercred(sk, other);
 
 	sock->state	= SS_CONNECTED;
 	sk->sk_state	= TCP_ESTABLISHED;
@@ -1199,10 +1220,8 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
 	sock_hold(skb);
 	unix_peer(ska) = skb;
 	unix_peer(skb) = ska;
-	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
-	current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
-	ska->sk_peercred.uid = skb->sk_peercred.uid;
-	ska->sk_peercred.gid = skb->sk_peercred.gid;
+	init_peercred(ska);
+	init_peercred(skb);
 
 	if (ska->sk_type != SOCK_DGRAM) {
 		ska->sk_state = TCP_ESTABLISHED;
-- 
cgit v1.2.3-55-g7522


From 257b5358b32f17e0603b6ff57b13610b0e02348f Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Sun, 13 Jun 2010 03:32:34 +0000
Subject: scm: Capture the full credentials of the scm sender.

Start capturing not only the userspace pid, uid and gid values of the
sending process but also the struct pid and struct cred of the sending
process as well.

This is in preparation for properly supporting SCM_CREDENTIALS for
sockets that have different uid and/or pid namespaces at the different
ends.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Serge E. Hallyn <serge@hallyn.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/scm.h | 28 ++++++++++++++++++++++++----
 net/core/scm.c    | 24 ++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/scm.h b/include/net/scm.h
index 17d9d2e75ff1..31656506d967 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -19,6 +19,8 @@ struct scm_fp_list {
 };
 
 struct scm_cookie {
+	struct pid		*pid;		/* Skb credentials */
+	const struct cred	*cred;
 	struct scm_fp_list	*fp;		/* Passed files		*/
 	struct ucred		creds;		/* Skb credentials	*/
 #ifdef CONFIG_SECURITY_NETWORK
@@ -42,8 +44,27 @@ static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_co
 { }
 #endif /* CONFIG_SECURITY_NETWORK */
 
+static __inline__ void scm_set_cred(struct scm_cookie *scm,
+				    struct pid *pid, const struct cred *cred)
+{
+	scm->pid  = get_pid(pid);
+	scm->cred = get_cred(cred);
+	cred_to_ucred(pid, cred, &scm->creds);
+}
+
+static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
+{
+	put_pid(scm->pid);
+	scm->pid  = NULL;
+
+	if (scm->cred)
+		put_cred(scm->cred);
+	scm->cred = NULL;
+}
+
 static __inline__ void scm_destroy(struct scm_cookie *scm)
 {
+	scm_destroy_cred(scm);
 	if (scm && scm->fp)
 		__scm_destroy(scm);
 }
@@ -51,10 +72,7 @@ static __inline__ void scm_destroy(struct scm_cookie *scm)
 static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
 			       struct scm_cookie *scm)
 {
-	struct task_struct *p = current;
-	scm->creds.uid = current_uid();
-	scm->creds.gid = current_gid();
-	scm->creds.pid = task_tgid_vnr(p);
+	scm_set_cred(scm, task_tgid(current), current_cred());
 	scm->fp = NULL;
 	unix_get_peersec_dgram(sock, scm);
 	if (msg->msg_controllen <= 0)
@@ -96,6 +114,8 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
 	if (test_bit(SOCK_PASSCRED, &sock->flags))
 		put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(scm->creds), &scm->creds);
 
+	scm_destroy_cred(scm);
+
 	scm_passec(sock, msg, scm);
 
 	if (!scm->fp)
diff --git a/net/core/scm.c b/net/core/scm.c
index b88f6f9d0b97..681c976307b5 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -170,6 +170,30 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 			err = scm_check_creds(&p->creds);
 			if (err)
 				goto error;
+
+			if (pid_vnr(p->pid) != p->creds.pid) {
+				struct pid *pid;
+				err = -ESRCH;
+				pid = find_get_pid(p->creds.pid);
+				if (!pid)
+					goto error;
+				put_pid(p->pid);
+				p->pid = pid;
+			}
+
+			if ((p->cred->euid != p->creds.uid) ||
+				(p->cred->egid != p->creds.gid)) {
+				struct cred *cred;
+				err = -ENOMEM;
+				cred = prepare_creds();
+				if (!cred)
+					goto error;
+
+				cred->uid = cred->euid = p->creds.uid;
+				cred->gid = cred->egid = p->creds.uid;
+				put_cred(p->cred);
+				p->cred = cred;
+			}
 			break;
 		default:
 			goto error;
-- 
cgit v1.2.3-55-g7522


From 7361c36c5224519b258219fe3d0e8abc865d8134 Mon Sep 17 00:00:00 2001
From: Eric W. Biederman
Date: Sun, 13 Jun 2010 03:34:33 +0000
Subject: af_unix: Allow credentials to work across user and pid namespaces.

In unix_skb_parms store pointers to struct pid and struct cred instead
of raw uid, gid, and pid values, then translate the credentials on
reception into values that are meaningful in the receiving processes
namespaces.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_unix.h |  4 ++--
 net/unix/af_unix.c    | 53 ++++++++++++++++++++++++++++++---------------------
 2 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'include/net')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 20725e213aee..90c9e2872f27 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -23,7 +23,8 @@ struct unix_address {
 };
 
 struct unix_skb_parms {
-	struct ucred		creds;		/* Skb credentials	*/
+	struct pid		*pid;		/* Skb credentials	*/
+	const struct cred	*cred;
 	struct scm_fp_list	*fp;		/* Passed files		*/
 #ifdef CONFIG_SECURITY_NETWORK
 	u32			secid;		/* Security ID		*/
@@ -31,7 +32,6 @@ struct unix_skb_parms {
 };
 
 #define UNIXCB(skb) 	(*(struct unix_skb_parms *)&((skb)->cb))
-#define UNIXCREDS(skb)	(&UNIXCB((skb)).creds)
 #define UNIXSID(skb)	(&UNIXCB((skb)).secid)
 
 #define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e1f1349fae86..5fe9d6fe08b8 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1316,18 +1316,20 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	int i;
 
 	scm->fp = UNIXCB(skb).fp;
-	skb->destructor = sock_wfree;
 	UNIXCB(skb).fp = NULL;
 
 	for (i = scm->fp->count-1; i >= 0; i--)
 		unix_notinflight(scm->fp->fp[i]);
 }
 
-static void unix_destruct_fds(struct sk_buff *skb)
+static void unix_destruct_scm(struct sk_buff *skb)
 {
 	struct scm_cookie scm;
 	memset(&scm, 0, sizeof(scm));
-	unix_detach_fds(&scm, skb);
+	scm.pid  = UNIXCB(skb).pid;
+	scm.cred = UNIXCB(skb).cred;
+	if (UNIXCB(skb).fp)
+		unix_detach_fds(&scm, skb);
 
 	/* Alas, it calls VFS */
 	/* So fscking what? fput() had been SMP-safe since the last Summer */
@@ -1350,10 +1352,22 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 
 	for (i = scm->fp->count-1; i >= 0; i--)
 		unix_inflight(scm->fp->fp[i]);
-	skb->destructor = unix_destruct_fds;
 	return 0;
 }
 
+static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
+{
+	int err = 0;
+	UNIXCB(skb).pid  = get_pid(scm->pid);
+	UNIXCB(skb).cred = get_cred(scm->cred);
+	UNIXCB(skb).fp = NULL;
+	if (scm->fp && send_fds)
+		err = unix_attach_fds(scm, skb);
+
+	skb->destructor = unix_destruct_scm;
+	return err;
+}
+
 /*
  *	Send AF_UNIX data.
  */
@@ -1410,12 +1424,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (skb == NULL)
 		goto out;
 
-	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-	if (siocb->scm->fp) {
-		err = unix_attach_fds(siocb->scm, skb);
-		if (err)
-			goto out_free;
-	}
+	err = unix_scm_to_skb(siocb->scm, skb, true);
+	if (err)
+		goto out_free;
 	unix_get_secdata(siocb->scm, skb);
 
 	skb_reset_transport_header(skb);
@@ -1585,16 +1596,14 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		 */
 		size = min_t(int, size, skb_tailroom(skb));
 
-		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
+
 		/* Only send the fds in the first buffer */
-		if (siocb->scm->fp && !fds_sent) {
-			err = unix_attach_fds(siocb->scm, skb);
-			if (err) {
-				kfree_skb(skb);
-				goto out_err;
-			}
-			fds_sent = true;
+		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
+		if (err) {
+			kfree_skb(skb);
+			goto out_err;
 		}
+		fds_sent = true;
 
 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
 		if (err) {
@@ -1711,7 +1720,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 		siocb->scm = &tmp_scm;
 		memset(&tmp_scm, 0, sizeof(tmp_scm));
 	}
-	siocb->scm->creds = *UNIXCREDS(skb);
+	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
 	unix_set_secdata(siocb->scm, skb);
 
 	if (!(flags & MSG_PEEK)) {
@@ -1860,14 +1869,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		if (check_creds) {
 			/* Never glue messages from different writers */
-			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
-				   sizeof(siocb->scm->creds)) != 0) {
+			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
+			    (UNIXCB(skb).cred != siocb->scm->cred)) {
 				skb_queue_head(&sk->sk_receive_queue, skb);
 				break;
 			}
 		} else {
 			/* Copy credentials */
-			siocb->scm->creds = *UNIXCREDS(skb);
+			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
 			check_creds = 1;
 		}
 
-- 
cgit v1.2.3-55-g7522


From c68cd6cc21eb329c47ff020ff7412bf58176984e Mon Sep 17 00:00:00 2001
From: Patrick McHardy
Date: Thu, 17 Jun 2010 06:12:26 +0200
Subject: netfilter: nf_nat: support user-specified SNAT rules in LOCAL_IN

2.6.34 introduced 'conntrack zones' to deal with cases where packets
from multiple identical networks are handled by conntrack/NAT. Packets
are looped through veth devices, during which they are NATed to private
addresses, after which they can continue normally through the stack
and possibly have NAT rules applied a second time.

This works well, but is needlessly complicated for cases where only
a single SNAT/DNAT mapping needs to be applied to these packets. In that
case, all that needs to be done is to assign each network to a seperate
zone and perform NAT as usual. However this doesn't work for packets
destined for the machine performing NAT itself since its corrently not
possible to configure SNAT mappings for the LOCAL_IN chain.

This patch adds a new INPUT chain to the NAT table and changes the
targets performing SNAT to be usable in that chain.

Example usage with two identical networks (192.168.0.0/24) on eth0/eth1:

iptables -t raw -A PREROUTING -i eth0 -j CT --zone 1
iptables -t raw -A PREROUTING -i eth0 -j MARK --set-mark 1
iptables -t raw -A PREROUTING -i eth1 -j CT --zone 2
iptabels -t raw -A PREROUTING -i eth1 -j MARK --set-mark 2

iptables -t nat -A INPUT       -m mark --mark 1 -j NETMAP --to 10.0.0.0/24
iptables -t nat -A POSTROUTING -m mark --mark 1 -j NETMAP --to 10.0.0.0/24
iptables -t nat -A INPUT       -m mark --mark 2 -j NETMAP --to 10.0.1.0/24
iptables -t nat -A POSTROUTING -m mark --mark 2 -j NETMAP --to 10.0.1.0/24

iptables -t raw -A PREROUTING -d 10.0.0.0/24 -j CT --zone 1
iptables -t raw -A OUTPUT     -d 10.0.0.0/24 -j CT --zone 1
iptables -t raw -A PREROUTING -d 10.0.1.0/24 -j CT --zone 2
iptables -t raw -A OUTPUT     -d 10.0.1.0/24 -j CT --zone 2

iptables -t nat -A PREROUTING -d 10.0.0.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A OUTPUT     -d 10.0.0.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A PREROUTING -d 10.0.1.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A OUTPUT     -d 10.0.1.0/24 -j NETMAP --to 192.168.0.0/24

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_nat_rule.h    |  2 --
 net/ipv4/netfilter/ipt_NETMAP.c        |  6 ++++--
 net/ipv4/netfilter/nf_nat_rule.c       | 10 ++++++----
 net/ipv4/netfilter/nf_nat_standalone.c |  8 +-------
 4 files changed, 11 insertions(+), 15 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_nat_rule.h b/include/net/netfilter/nf_nat_rule.h
index e4a18ae361c6..2890bdc4cd92 100644
--- a/include/net/netfilter/nf_nat_rule.h
+++ b/include/net/netfilter/nf_nat_rule.h
@@ -12,6 +12,4 @@ extern int nf_nat_rule_find(struct sk_buff *skb,
 			    const struct net_device *out,
 			    struct nf_conn *ct);
 
-extern unsigned int
-alloc_null_binding(struct nf_conn *ct, unsigned int hooknum);
 #endif /* _NF_NAT_RULE_H */
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index f43867d1697f..6cdb298f1035 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -48,7 +48,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_POST_ROUTING ||
-		     par->hooknum == NF_INET_LOCAL_OUT);
+		     par->hooknum == NF_INET_LOCAL_OUT ||
+		     par->hooknum == NF_INET_LOCAL_IN);
 	ct = nf_ct_get(skb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
@@ -77,7 +78,8 @@ static struct xt_target netmap_tg_reg __read_mostly = {
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) |
 			  (1 << NF_INET_POST_ROUTING) |
-			  (1 << NF_INET_LOCAL_OUT),
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_LOCAL_IN),
 	.checkentry 	= netmap_tg_check,
 	.me 		= THIS_MODULE
 };
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 98ed78281aee..ebbd319f62f5 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -28,7 +28,8 @@
 
 #define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
 			 (1 << NF_INET_POST_ROUTING) | \
-			 (1 << NF_INET_LOCAL_OUT))
+			 (1 << NF_INET_LOCAL_OUT) | \
+			 (1 << NF_INET_LOCAL_IN))
 
 static const struct xt_table nat_table = {
 	.name		= "nat",
@@ -45,7 +46,8 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 	enum ip_conntrack_info ctinfo;
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
-	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
+	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
+		     par->hooknum == NF_INET_LOCAL_IN);
 
 	ct = nf_ct_get(skb, &ctinfo);
 
@@ -99,7 +101,7 @@ static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 	return 0;
 }
 
-unsigned int
+static unsigned int
 alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
 {
 	/* Force range to this IP; let proto decide mapping for
@@ -141,7 +143,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
 	.target		= ipt_snat_target,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
-	.hooks		= 1 << NF_INET_POST_ROUTING,
+	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
 	.checkentry	= ipt_snat_checkentry,
 	.family		= AF_INET,
 };
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 6723c682250d..95481fee8bdb 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -131,13 +131,7 @@ nf_nat_fn(unsigned int hooknum,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			if (hooknum == NF_INET_LOCAL_IN)
-				/* LOCAL_IN hook doesn't have a chain!  */
-				ret = alloc_null_binding(ct, hooknum);
-			else
-				ret = nf_nat_rule_find(skb, hooknum, in, out,
-						       ct);
-
+			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 		} else
-- 
cgit v1.2.3-55-g7522


From b1c74247b9e29ae3bfdf133862328c309bc9cf14 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland
Date: Thu, 17 Jun 2010 06:55:38 +0000
Subject: caif: Bugfix not all services uses flow-ctrl.

Flow control is not used by all CAIF services.
The usage of flow control is now part of the gerneal
initialization function for CAIF Services.

Signed-off-by: Sjur Braendeland@stericsson.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/cfsrvl.h |  6 ++++--
 net/caif/cfctrl.c         |  2 +-
 net/caif/cfdbgl.c         |  2 +-
 net/caif/cfdgml.c         |  2 +-
 net/caif/cfrfml.c         | 12 ++++--------
 net/caif/cfsrvl.c         | 14 ++++++++++++--
 net/caif/cfutill.c        |  2 +-
 net/caif/cfveil.c         |  2 +-
 net/caif/cfvidl.c         |  2 +-
 9 files changed, 26 insertions(+), 18 deletions(-)

(limited to 'include/net')

diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h
index 2dc9eb193ecf..5d921f5e18c4 100644
--- a/include/net/caif/cfsrvl.h
+++ b/include/net/caif/cfsrvl.h
@@ -16,6 +16,7 @@ struct cfsrvl {
 	bool open;
 	bool phy_flow_on;
 	bool modem_flow_on;
+	bool supports_flowctrl;
 	struct dev_info dev_info;
 	struct kref ref;
 };
@@ -30,8 +31,9 @@ struct cflayer *cfdbgl_create(u8 linkid, struct dev_info *dev_info);
 bool cfsrvl_phyid_match(struct cflayer *layer, int phyid);
 void cfservl_destroy(struct cflayer *layer);
 void cfsrvl_init(struct cfsrvl *service,
-		 u8 channel_id,
-		 struct dev_info *dev_info);
+			u8 channel_id,
+			struct dev_info *dev_info,
+			bool supports_flowctrl);
 bool cfsrvl_ready(struct cfsrvl *service, int *err);
 u8 cfsrvl_getphyid(struct cflayer *layer);
 
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index fcfda98a5e6d..107c4b2a311e 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -43,7 +43,7 @@ struct cflayer *cfctrl_create(void)
 	memset(&dev_info, 0, sizeof(dev_info));
 	dev_info.id = 0xff;
 	memset(this, 0, sizeof(*this));
-	cfsrvl_init(&this->serv, 0, &dev_info);
+	cfsrvl_init(&this->serv, 0, &dev_info, false);
 	atomic_set(&this->req_seq_no, 1);
 	atomic_set(&this->rsp_seq_no, 1);
 	this->serv.layer.receive = cfctrl_recv;
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index ab6b6dc34cf8..676648cac8dd 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -22,7 +22,7 @@ struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(dbg, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(dbg, channel_id, dev_info);
+	cfsrvl_init(dbg, channel_id, dev_info, false);
 	dbg->layer.receive = cfdbgl_receive;
 	dbg->layer.transmit = cfdbgl_transmit;
 	snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ - 1, "dbg%d", channel_id);
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index 53194840ecb6..32d9f0dc8463 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -30,7 +30,7 @@ struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(dgm, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(dgm, channel_id, dev_info);
+	cfsrvl_init(dgm, channel_id, dev_info, true);
 	dgm->layer.receive = cfdgml_receive;
 	dgm->layer.transmit = cfdgml_transmit;
 	snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ - 1, "dgm%d", channel_id);
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index fd27b172fb5d..689cbfd0e43d 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -23,30 +23,26 @@
 
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt);
-static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl);
 
 struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info)
 {
 	struct cfsrvl *rfm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+
 	if (!rfm) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
 		return NULL;
 	}
+
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+
 	memset(rfm, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(rfm, channel_id, dev_info);
-	rfm->layer.modemcmd = cfservl_modemcmd;
+	cfsrvl_init(rfm, channel_id, dev_info, false);
 	rfm->layer.receive = cfrfml_receive;
 	rfm->layer.transmit = cfrfml_transmit;
 	snprintf(rfm->layer.name, CAIF_LAYER_NAME_SZ, "rfm%d", channel_id);
 	return &rfm->layer;
 }
 
-static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
-{
-       return -EPROTO;
-}
-
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
 {
 	u8 tmp;
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 6e5b7079a684..7aa1f03a0151 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -24,8 +24,10 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 				int phyid)
 {
 	struct cfsrvl *service = container_obj(layr);
+
 	caif_assert(layr->up != NULL);
 	caif_assert(layr->up->ctrlcmd != NULL);
+
 	switch (ctrl) {
 	case CAIF_CTRLCMD_INIT_RSP:
 		service->open = true;
@@ -89,9 +91,14 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
 {
 	struct cfsrvl *service = container_obj(layr);
+
 	caif_assert(layr != NULL);
 	caif_assert(layr->dn != NULL);
 	caif_assert(layr->dn->transmit != NULL);
+
+	if (!service->supports_flowctrl)
+		return 0;
+
 	switch (ctrl) {
 	case CAIF_MODEMCMD_FLOW_ON_REQ:
 		{
@@ -153,8 +160,10 @@ void cfservl_destroy(struct cflayer *layer)
 }
 
 void cfsrvl_init(struct cfsrvl *service,
-		 u8 channel_id,
-		 struct dev_info *dev_info)
+			u8 channel_id,
+			struct dev_info *dev_info,
+			bool supports_flowctrl
+			)
 {
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	service->open = false;
@@ -164,6 +173,7 @@ void cfsrvl_init(struct cfsrvl *service,
 	service->layer.ctrlcmd = cfservl_ctrlcmd;
 	service->layer.modemcmd = cfservl_modemcmd;
 	service->dev_info = *dev_info;
+	service->supports_flowctrl = supports_flowctrl;
 	kref_init(&service->ref);
 }
 
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 5fd2c9ea8b42..ce525cac9064 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -31,7 +31,7 @@ struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(util, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(util, channel_id, dev_info);
+	cfsrvl_init(util, channel_id, dev_info, true);
 	util->layer.receive = cfutill_receive;
 	util->layer.transmit = cfutill_transmit;
 	snprintf(util->layer.name, CAIF_LAYER_NAME_SZ - 1, "util1");
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index e04f7d964e83..637cb0eee13c 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -30,7 +30,7 @@ struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(vei, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(vei, channel_id, dev_info);
+	cfsrvl_init(vei, channel_id, dev_info, true);
 	vei->layer.receive = cfvei_receive;
 	vei->layer.transmit = cfvei_transmit;
 	snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ - 1, "vei%d", channel_id);
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index 89ad4ea239f1..ada6ee2d48f5 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -27,7 +27,7 @@ struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 
 	memset(vid, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(vid, channel_id, dev_info);
+	cfsrvl_init(vid, channel_id, dev_info, false);
 	vid->layer.receive = cfvidl_receive;
 	vid->layer.transmit = cfvidl_transmit;
 	snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ - 1, "vid1");
-- 
cgit v1.2.3-55-g7522


From a7da1f55a826c621251874e7684c234972fc3216 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland
Date: Thu, 17 Jun 2010 06:55:39 +0000
Subject: caif: Bugfix - RFM must support segmentation.

CAIF Remote File Manager may send or receive more than 4050 bytes.
Due to this The CAIF RFM service have to support segmentation.

Signed-off-by: Sjur Braendeland@stericsson.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/cfsrvl.h |   9 +-
 net/caif/caif_socket.c    |   4 -
 net/caif/cfcnfg.c         |   4 +-
 net/caif/cfpkt_skbuff.c   |   1 -
 net/caif/cfrfml.c         | 314 ++++++++++++++++++++++++++++++++++++++--------
 net/caif/cfsrvl.c         |  13 +-
 6 files changed, 278 insertions(+), 67 deletions(-)

(limited to 'include/net')

diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h
index 5d921f5e18c4..b1fa87ee0992 100644
--- a/include/net/caif/cfsrvl.h
+++ b/include/net/caif/cfsrvl.h
@@ -17,6 +17,7 @@ struct cfsrvl {
 	bool phy_flow_on;
 	bool modem_flow_on;
 	bool supports_flowctrl;
+	void (*release)(struct kref *);
 	struct dev_info dev_info;
 	struct kref ref;
 };
@@ -26,7 +27,8 @@ struct cflayer *cfvei_create(u8 linkid, struct dev_info *dev_info);
 struct cflayer *cfdgml_create(u8 linkid, struct dev_info *dev_info);
 struct cflayer *cfutill_create(u8 linkid, struct dev_info *dev_info);
 struct cflayer *cfvidl_create(u8 linkid, struct dev_info *dev_info);
-struct cflayer *cfrfml_create(u8 linkid, struct dev_info *dev_info);
+struct cflayer *cfrfml_create(u8 linkid, struct dev_info *dev_info,
+				int mtu_size);
 struct cflayer *cfdbgl_create(u8 linkid, struct dev_info *dev_info);
 bool cfsrvl_phyid_match(struct cflayer *layer, int phyid);
 void cfservl_destroy(struct cflayer *layer);
@@ -52,7 +54,10 @@ static inline void cfsrvl_put(struct cflayer *layr)
 	if (layr == NULL)
 		return;
 	s = container_of(layr, struct cfsrvl, layer);
-	kref_put(&s->ref, cfsrvl_release);
+
+	WARN_ON(!s->release);
+	if (s->release)
+		kref_put(&s->ref, s->release);
 }
 
 #endif				/* CFSRVL_H_ */
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 791249316ef3..848ae755cdd6 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -596,10 +596,6 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 	buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM;
 
-	ret = -EMSGSIZE;
-	if (buffer_size > CAIF_MAX_PAYLOAD_SIZE)
-		goto err;
-
 	timeo = sock_sndtimeo(sk, noblock);
 	timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk),
 				1, timeo, &ret);
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 7c81974a45c4..cff2dcb9efe4 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -22,6 +22,7 @@
 #define PHY_NAME_LEN 20
 
 #define container_obj(layr) container_of(layr, struct cfcnfg, layer)
+#define RFM_FRAGMENT_SIZE 4030
 
 /* Information about CAIF physical interfaces held by Config Module in order
  * to manage physical interfaces
@@ -328,7 +329,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 		servicel = cfdgml_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_RFM:
-		servicel = cfrfml_create(channel_id, &phyinfo->dev_info);
+		servicel = cfrfml_create(channel_id, &phyinfo->dev_info,
+						RFM_FRAGMENT_SIZE);
 		break;
 	case CFCTRL_SRV_UTIL:
 		servicel = cfutill_create(channel_id, &phyinfo->dev_info);
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index a6fdf899741a..318b0f4b416e 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -338,7 +338,6 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
 	u16 dstlen;
 	u16 createlen;
 	if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) {
-		cfpkt_destroy(addpkt);
 		return dstpkt;
 	}
 	if (expectlen > addlen)
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 689cbfd0e43d..4b04d25b6a3f 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -7,98 +7,304 @@
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
+#include <linux/unaligned/le_byteshift.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfsrvl.h>
 #include <net/caif/cfpkt.h>
 
-#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
-
+#define container_obj(layr) container_of(layr, struct cfrfml, serv.layer)
 #define RFM_SEGMENTATION_BIT 0x01
-#define RFM_PAYLOAD  0x00
-#define RFM_CMD_BIT  0x80
-#define RFM_FLOW_OFF 0x81
-#define RFM_FLOW_ON  0x80
-#define RFM_SET_PIN  0x82
-#define RFM_CTRL_PKT_SIZE 1
+#define RFM_HEAD_SIZE 7
 
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt);
 
-struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info)
+struct cfrfml {
+	struct cfsrvl serv;
+	struct cfpkt *incomplete_frm;
+	int fragment_size;
+	u8  seghead[6];
+	u16 pdu_size;
+	/* Protects serialized processing of packets */
+	spinlock_t sync;
+};
+
+static void cfrfml_release(struct kref *kref)
+{
+	struct cfsrvl *srvl = container_of(kref, struct cfsrvl, ref);
+	struct cfrfml *rfml = container_obj(&srvl->layer);
+
+	if (rfml->incomplete_frm)
+		cfpkt_destroy(rfml->incomplete_frm);
+
+	kfree(srvl);
+}
+
+struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
+					int mtu_size)
 {
-	struct cfsrvl *rfm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	int tmp;
+	struct cfrfml *this =
+		kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
 
-	if (!rfm) {
+	if (!this) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
 		return NULL;
 	}
 
-	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	cfsrvl_init(&this->serv, channel_id, dev_info, false);
+	this->serv.release = cfrfml_release;
+	this->serv.layer.receive = cfrfml_receive;
+	this->serv.layer.transmit = cfrfml_transmit;
+
+	/* Round down to closest multiple of 16 */
+	tmp = (mtu_size - RFM_HEAD_SIZE - 6) / 16;
+	tmp *= 16;
+
+	this->fragment_size = tmp;
+	spin_lock_init(&this->sync);
+	snprintf(this->serv.layer.name, CAIF_LAYER_NAME_SZ,
+		"rfm%d", channel_id);
+
+	return &this->serv.layer;
+}
+
+static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead,
+			struct cfpkt *pkt, int *err)
+{
+	struct cfpkt *tmppkt;
+	*err = -EPROTO;
+	/* n-th but not last segment */
+
+	if (cfpkt_extr_head(pkt, seghead, 6) < 0)
+		return NULL;
+
+	/* Verify correct header */
+	if (memcmp(seghead, rfml->seghead, 6) != 0)
+		return NULL;
+
+	tmppkt = cfpkt_append(rfml->incomplete_frm, pkt,
+			rfml->pdu_size + RFM_HEAD_SIZE);
+
+	/* If cfpkt_append failes input pkts are not freed */
+	*err = -ENOMEM;
+	if (tmppkt == NULL)
+		return NULL;
 
-	memset(rfm, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(rfm, channel_id, dev_info, false);
-	rfm->layer.receive = cfrfml_receive;
-	rfm->layer.transmit = cfrfml_transmit;
-	snprintf(rfm->layer.name, CAIF_LAYER_NAME_SZ, "rfm%d", channel_id);
-	return &rfm->layer;
+	*err = 0;
+	return tmppkt;
 }
 
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
 {
 	u8 tmp;
 	bool segmented;
-	int ret;
+	int err;
+	u8 seghead[6];
+	struct cfrfml *rfml;
+	struct cfpkt *tmppkt = NULL;
+
 	caif_assert(layr->up != NULL);
 	caif_assert(layr->receive != NULL);
+	rfml = container_obj(layr);
+	spin_lock(&rfml->sync);
+
+	err = -EPROTO;
+	if (cfpkt_extr_head(pkt, &tmp, 1) < 0)
+		goto out;
+	segmented = tmp & RFM_SEGMENTATION_BIT;
+
+	if (segmented) {
+		if (rfml->incomplete_frm == NULL) {
+			/* Initial Segment */
+			if (cfpkt_peek_head(pkt, rfml->seghead, 6) < 0)
+				goto out;
+
+			rfml->pdu_size = get_unaligned_le16(rfml->seghead+4);
+
+			if (cfpkt_erroneous(pkt))
+				goto out;
+			rfml->incomplete_frm = pkt;
+			pkt = NULL;
+		} else {
+
+			tmppkt = rfm_append(rfml, seghead, pkt, &err);
+			if (tmppkt == NULL)
+				goto out;
+
+			if (cfpkt_erroneous(tmppkt))
+				goto out;
+
+			rfml->incomplete_frm = tmppkt;
+
+
+			if (cfpkt_erroneous(tmppkt))
+				goto out;
+		}
+		err = 0;
+		goto out;
+	}
+
+	if (rfml->incomplete_frm) {
+
+		/* Last Segment */
+		tmppkt = rfm_append(rfml, seghead, pkt, &err);
+		if (tmppkt == NULL)
+			goto out;
+
+		if (cfpkt_erroneous(tmppkt))
+			goto out;
+
+		rfml->incomplete_frm = NULL;
+		pkt = tmppkt;
+		tmppkt = NULL;
+
+		/* Verify that length is correct */
+		err = EPROTO;
+		if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1)
+			goto out;
+	}
+
+	err = rfml->serv.layer.up->receive(rfml->serv.layer.up, pkt);
+
+out:
+
+	if (err != 0) {
+		if (tmppkt)
+			cfpkt_destroy(tmppkt);
+		if (pkt)
+			cfpkt_destroy(pkt);
+		if (rfml->incomplete_frm)
+			cfpkt_destroy(rfml->incomplete_frm);
+		rfml->incomplete_frm = NULL;
+
+		pr_info("CAIF: %s(): "
+				"Connection error %d triggered on RFM link\n",
+				__func__, err);
+
+		/* Trigger connection error upon failure.*/
+		layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
+					rfml->serv.dev_info.id);
+	}
+	spin_unlock(&rfml->sync);
+	return err;
+}
+
+
+static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
+{
+	caif_assert(!cfpkt_getlen(pkt) < rfml->fragment_size);
+
+	/* Add info for MUX-layer to route the packet out. */
+	cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
 
 	/*
-	 * RFM is taking care of segmentation and stripping of
-	 * segmentation bit.
+	 * To optimize alignment, we add up the size of CAIF header before
+	 * payload.
 	 */
-	if (cfpkt_extr_head(pkt, &tmp, 1) < 0) {
-		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
-		cfpkt_destroy(pkt);
-		return -EPROTO;
-	}
-	segmented = tmp & RFM_SEGMENTATION_BIT;
-	caif_assert(!segmented);
+	cfpkt_info(pkt)->hdr_len = RFM_HEAD_SIZE;
+	cfpkt_info(pkt)->dev_info = &rfml->serv.dev_info;
 
-	ret = layr->up->receive(layr->up, pkt);
-	return ret;
+	return rfml->serv.layer.dn->transmit(rfml->serv.layer.dn, pkt);
 }
 
 static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 {
-	u8 tmp = 0;
-	int ret;
-	struct cfsrvl *service = container_obj(layr);
+	int err;
+	u8 seg;
+	u8 head[6];
+	struct cfpkt *rearpkt = NULL;
+	struct cfpkt *frontpkt = pkt;
+	struct cfrfml *rfml = container_obj(layr);
 
 	caif_assert(layr->dn != NULL);
 	caif_assert(layr->dn->transmit != NULL);
 
-	if (!cfsrvl_ready(service, &ret))
-		return ret;
+	if (!cfsrvl_ready(&rfml->serv, &err))
+		return err;
+
+	err = -EPROTO;
+	if (cfpkt_getlen(pkt) <= RFM_HEAD_SIZE-1)
+		goto out;
+
+	err = 0;
+	if (cfpkt_getlen(pkt) > rfml->fragment_size + RFM_HEAD_SIZE)
+		err = cfpkt_peek_head(pkt, head, 6);
+
+	if (err < 0)
+		goto out;
+
+	while (cfpkt_getlen(frontpkt) > rfml->fragment_size + RFM_HEAD_SIZE) {
+
+		seg = 1;
+		err = -EPROTO;
+
+		if (cfpkt_add_head(frontpkt, &seg, 1) < 0)
+			goto out;
+		/*
+		 * On OOM error cfpkt_split returns NULL.
+		 *
+		 * NOTE: Segmented pdu is not correctly aligned.
+		 * This has negative performance impact.
+		 */
+
+		rearpkt = cfpkt_split(frontpkt, rfml->fragment_size);
+		if (rearpkt == NULL)
+			goto out;
+
+		err = cfrfml_transmit_segment(rfml, frontpkt);
+
+		if (err != 0)
+			goto out;
+		frontpkt = rearpkt;
+		rearpkt = NULL;
+
+		err = -ENOMEM;
+		if (frontpkt == NULL)
+			goto out;
+		err = -EPROTO;
+		if (cfpkt_add_head(frontpkt, head, 6) < 0)
+			goto out;
 
-	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_err("CAIF: %s():Packet too large - size=%d\n",
-			__func__, cfpkt_getlen(pkt));
-		return -EOVERFLOW;
 	}
-	if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
-		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
-		return -EPROTO;
+
+	seg = 0;
+	err = -EPROTO;
+
+	if (cfpkt_add_head(frontpkt, &seg, 1) < 0)
+		goto out;
+
+	err = cfrfml_transmit_segment(rfml, frontpkt);
+
+	frontpkt = NULL;
+out:
+
+	if (err != 0) {
+		pr_info("CAIF: %s(): "
+				"Connection error %d triggered on RFM link\n",
+				__func__, err);
+		/* Trigger connection error upon failure.*/
+
+		layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
+					rfml->serv.dev_info.id);
+
+		if (rearpkt)
+			cfpkt_destroy(rearpkt);
+
+		if (frontpkt && frontpkt != pkt) {
+
+			cfpkt_destroy(frontpkt);
+			/*
+			 * Socket layer will free the original packet,
+			 * but this packet may already be sent and
+			 * freed. So we have to return 0 in this case
+			 * to avoid socket layer to re-free this packet.
+			 * The return of shutdown indication will
+			 * cause connection to be invalidated anyhow.
+			 */
+			err = 0;
+		}
 	}
 
-	/* Add info for MUX-layer to route the packet out. */
-	cfpkt_info(pkt)->channel_id = service->layer.id;
-	/*
-	 * To optimize alignment, we add up the size of CAIF header before
-	 * payload.
-	 */
-	cfpkt_info(pkt)->hdr_len = 1;
-	cfpkt_info(pkt)->dev_info = &service->dev_info;
-	ret = layr->dn->transmit(layr->dn, pkt);
-	if (ret < 0)
-		cfpkt_extr_head(pkt, &tmp, 1);
-	return ret;
+	return err;
 }
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 7aa1f03a0151..4c9f147c38af 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -159,6 +159,13 @@ void cfservl_destroy(struct cflayer *layer)
 	kfree(layer);
 }
 
+void cfsrvl_release(struct kref *kref)
+{
+	struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
+	pr_info("CAIF: %s(): enter\n", __func__);
+	kfree(service);
+}
+
 void cfsrvl_init(struct cfsrvl *service,
 			u8 channel_id,
 			struct dev_info *dev_info,
@@ -174,14 +181,10 @@ void cfsrvl_init(struct cfsrvl *service,
 	service->layer.modemcmd = cfservl_modemcmd;
 	service->dev_info = *dev_info;
 	service->supports_flowctrl = supports_flowctrl;
+	service->release = cfsrvl_release;
 	kref_init(&service->ref);
 }
 
-void cfsrvl_release(struct kref *kref)
-{
-	struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
-	kfree(service);
-}
 
 bool cfsrvl_ready(struct cfsrvl *service, int *err)
 {
-- 
cgit v1.2.3-55-g7522


From 2aa40aef9debc77d55cc87a50d335b6fe97fbeb0 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland
Date: Thu, 17 Jun 2010 06:55:40 +0000
Subject: caif: Use link layer MTU instead of fixed MTU

Previously CAIF supported maximum transfer size of ~4050.
The transfer size is now calculated dynamically based on the
link layers mtu size.

Signed-off-by: Sjur Braendeland@stericsson.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/caif/caif_serial.c |  1 -
 include/net/caif/caif_dev.h    |  8 +++--
 include/net/caif/caif_layer.h  |  6 ----
 include/net/caif/cfcnfg.h      | 16 +++++++---
 net/caif/caif_dev.c            | 12 +++++---
 net/caif/caif_socket.c         | 54 ++++++++++++++++++++++------------
 net/caif/cfcnfg.c              | 44 ++++++++++++++++++++++++---
 net/caif/cfctrl.c              |  6 ++--
 net/caif/cfdgml.c              |  5 ++++
 net/caif/cfpkt_skbuff.c        |  4 +--
 net/caif/cfserl.c              |  7 +++--
 net/caif/cfsrvl.c              |  1 -
 net/caif/cfutill.c             |  6 ----
 net/caif/cfveil.c              |  5 ----
 net/caif/chnl_net.c            | 67 +++++++++++++++++++++++++++++++++++-------
 15 files changed, 173 insertions(+), 69 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index 3e706f00a0d3..3df0c0f8b8bf 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -403,7 +403,6 @@ static void caifdev_setup(struct net_device *dev)
 	dev->type = ARPHRD_CAIF;
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
 	dev->mtu = CAIF_MAX_MTU;
-	dev->hard_header_len = CAIF_NEEDED_HEADROOM;
 	dev->tx_queue_len = 0;
 	dev->destructor = free_netdev;
 	skb_queue_head_init(&serdev->head);
diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h
index 318ab9478a44..6da573c75d54 100644
--- a/include/net/caif/caif_dev.h
+++ b/include/net/caif/caif_dev.h
@@ -50,6 +50,9 @@ struct caif_connect_request {
  * @client_layer:	User implementation of client layer. This layer
  *			MUST have receive and control callback functions
  *			implemented.
+ * @ifindex:		Link layer interface index used for this connection.
+ * @headroom:		Head room needed by CAIF protocol.
+ * @tailroom:		Tail room needed by CAIF protocol.
  *
  * This function connects a CAIF channel. The Client must implement
  * the struct cflayer. This layer represents the Client layer and holds
@@ -59,8 +62,9 @@ struct caif_connect_request {
  * E.g. CAIF Socket will call this function for each socket it connects
  * and have one client_layer instance for each socket.
  */
-int caif_connect_client(struct caif_connect_request *config,
-			   struct cflayer *client_layer);
+int caif_connect_client(struct caif_connect_request *conn_req,
+			struct cflayer *client_layer, int *ifindex,
+			int *headroom, int *tailroom);
 
 /**
  * caif_disconnect_client - Disconnects a client from the CAIF stack.
diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h
index 25c472f0e5b8..c8b07a904e78 100644
--- a/include/net/caif/caif_layer.h
+++ b/include/net/caif/caif_layer.h
@@ -15,14 +15,8 @@ struct cfpktq;
 struct caif_payload_info;
 struct caif_packet_funcs;
 
-#define CAIF_MAX_FRAMESIZE 4096
-#define CAIF_MAX_PAYLOAD_SIZE (4096 - 64)
-#define CAIF_NEEDED_HEADROOM (10)
-#define CAIF_NEEDED_TAILROOM (2)
 
 #define CAIF_LAYER_NAME_SZ 16
-#define CAIF_SUCCESS	1
-#define CAIF_FAILURE	0
 
 /**
  * caif_assert() - Assert function for CAIF.
diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h
index 9fc2fc20b884..bd646faffa47 100644
--- a/include/net/caif/cfcnfg.h
+++ b/include/net/caif/cfcnfg.h
@@ -7,6 +7,7 @@
 #ifndef CFCNFG_H_
 #define CFCNFG_H_
 #include <linux/spinlock.h>
+#include <linux/netdevice.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfctrl.h>
 
@@ -73,8 +74,8 @@ void cfcnfg_remove(struct cfcnfg *cfg);
 
 void
 cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
-		     void *dev, struct cflayer *phy_layer, u16 *phyid,
-		     enum cfcnfg_phy_preference pref,
+		     struct net_device *dev, struct cflayer *phy_layer,
+		     u16 *phyid, enum cfcnfg_phy_preference pref,
 		     bool fcs, bool stx);
 
 /**
@@ -114,11 +115,18 @@ void cfcnfg_release_adap_layer(struct cflayer *adap_layer);
  * @param:		Link setup parameters.
  * @adap_layer:		Specify the adaptation layer; the receive and
  *			flow-control functions MUST be set in the structure.
- *
+ * @ifindex:		Link layer interface index used for this connection.
+ * @proto_head:		Protocol head-space needed by CAIF protocol,
+ *			excluding link layer.
+ * @proto_tail:		Protocol tail-space needed by CAIF protocol,
+ *			excluding link layer.
  */
 int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 			    struct cfctrl_link_param *param,
-			    struct cflayer *adap_layer);
+			    struct cflayer *adap_layer,
+			    int *ifindex,
+			    int *proto_head,
+			    int *proto_tail);
 
 /**
  * cfcnfg_get_phyid() - Get physical ID, given type.
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index e2b86f1f5a47..0b586e9d1378 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -255,7 +255,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 			pref = CFPHYPREF_HIGH_BW;
 			break;
 		}
-
+		dev_hold(dev);
 		cfcnfg_add_phy_layer(get_caif_conf(),
 				     phy_type,
 				     dev,
@@ -285,6 +285,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 		caifd->layer.up->ctrlcmd(caifd->layer.up,
 					 _CAIF_CTRLCMD_PHYIF_DOWN_IND,
 					 caifd->layer.id);
+		might_sleep();
 		res = wait_event_interruptible_timeout(caifd->event,
 					atomic_read(&caifd->in_use) == 0,
 					TIMEOUT);
@@ -300,6 +301,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 				   "Unregistering an active CAIF device: %s\n",
 				   __func__, dev->name);
 		cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
+		dev_put(dev);
 		atomic_set(&caifd->state, what);
 		break;
 
@@ -326,7 +328,8 @@ struct cfcnfg *get_caif_conf(void)
 EXPORT_SYMBOL(get_caif_conf);
 
 int caif_connect_client(struct caif_connect_request *conn_req,
-			   struct cflayer *client_layer)
+			struct cflayer *client_layer, int *ifindex,
+			int *headroom, int *tailroom)
 {
 	struct cfctrl_link_param param;
 	int ret;
@@ -334,8 +337,9 @@ int caif_connect_client(struct caif_connect_request *conn_req,
 	if (ret)
 		return ret;
 	/* Hook up the adaptation layer. */
-	return cfcnfg_add_adaptation_layer(get_caif_conf(),
-						&param, client_layer);
+	return cfcnfg_add_adaptation_layer(get_caif_conf(), &param,
+					client_layer, ifindex,
+					headroom, tailroom);
 }
 EXPORT_SYMBOL(caif_connect_client);
 
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 848ae755cdd6..8ce904786116 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -28,8 +28,8 @@
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NETPROTO(AF_CAIF);
 
-#define CAIF_DEF_SNDBUF (CAIF_MAX_PAYLOAD_SIZE*10)
-#define CAIF_DEF_RCVBUF (CAIF_MAX_PAYLOAD_SIZE*100)
+#define CAIF_DEF_SNDBUF (4096*10)
+#define CAIF_DEF_RCVBUF (4096*100)
 
 /*
  * CAIF state is re-using the TCP socket states.
@@ -76,6 +76,7 @@ struct caifsock {
 	struct caif_connect_request conn_req;
 	struct mutex readlock;
 	struct dentry *debugfs_socket_dir;
+	int headroom, tailroom, maxframe;
 };
 
 static int rx_flow_is_on(struct caifsock *cf_sk)
@@ -594,23 +595,32 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		goto err;
 	noblock = msg->msg_flags & MSG_DONTWAIT;
 
-	buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM;
-
 	timeo = sock_sndtimeo(sk, noblock);
 	timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk),
 				1, timeo, &ret);
 
+	if (ret)
+		goto err;
 	ret = -EPIPE;
 	if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
 		sock_flag(sk, SOCK_DEAD) ||
 		(sk->sk_shutdown & RCV_SHUTDOWN))
 		goto err;
 
+	/* Error if trying to write more than maximum frame size. */
+	ret = -EMSGSIZE;
+	if (len > cf_sk->maxframe && cf_sk->sk.sk_protocol != CAIFPROTO_RFM)
+		goto err;
+
+	buffer_size = len + cf_sk->headroom + cf_sk->tailroom;
+
 	ret = -ENOMEM;
 	skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret);
-	if (!skb)
+
+	if (!skb || skb_tailroom(skb) < buffer_size)
 		goto err;
-	skb_reserve(skb, CAIF_NEEDED_HEADROOM);
+
+	skb_reserve(skb, cf_sk->headroom);
 
 	ret = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
 
@@ -641,7 +651,6 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	long timeo;
 
 	err = -EOPNOTSUPP;
-
 	if (unlikely(msg->msg_flags&MSG_OOB))
 		goto out_err;
 
@@ -658,8 +667,8 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 		size = len-sent;
 
-		if (size > CAIF_MAX_PAYLOAD_SIZE)
-			size = CAIF_MAX_PAYLOAD_SIZE;
+		if (size > cf_sk->maxframe)
+			size = cf_sk->maxframe;
 
 		/* If size is more than half of sndbuf, chop up message */
 		if (size > ((sk->sk_sndbuf >> 1) - 64))
@@ -669,14 +678,14 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			size = SKB_MAX_ALLOC;
 
 		skb = sock_alloc_send_skb(sk,
-					size + CAIF_NEEDED_HEADROOM
-					+ CAIF_NEEDED_TAILROOM,
+					size + cf_sk->headroom +
+					cf_sk->tailroom,
 					msg->msg_flags&MSG_DONTWAIT,
 					&err);
 		if (skb == NULL)
 			goto out_err;
 
-		skb_reserve(skb, CAIF_NEEDED_HEADROOM);
+		skb_reserve(skb, cf_sk->headroom);
 		/*
 		 *	If you pass two values to the sock_alloc_send_skb
 		 *	it tries to grab the large buffer with GFP_NOFS
@@ -817,17 +826,15 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 	long timeo;
 	int err;
+	int ifindex, headroom, tailroom;
+	struct net_device *dev;
+
 	lock_sock(sk);
 
 	err = -EAFNOSUPPORT;
 	if (uaddr->sa_family != AF_CAIF)
 		goto out;
 
-	err = -ESOCKTNOSUPPORT;
-	if (unlikely(!(sk->sk_type == SOCK_STREAM &&
-		       cf_sk->sk.sk_protocol == CAIFPROTO_AT) &&
-		       sk->sk_type != SOCK_SEQPACKET))
-		goto out;
 	switch (sock->state) {
 	case SS_UNCONNECTED:
 		/* Normal case, a fresh connect */
@@ -883,12 +890,23 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 	dbfs_atomic_inc(&cnt.num_connect_req);
 	cf_sk->layer.receive = caif_sktrecv_cb;
 	err = caif_connect_client(&cf_sk->conn_req,
-				&cf_sk->layer);
+				&cf_sk->layer, &ifindex, &headroom, &tailroom);
 	if (err < 0) {
 		cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
 		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
 		goto out;
 	}
+	dev = dev_get_by_index(sock_net(sk), ifindex);
+	cf_sk->headroom = LL_RESERVED_SPACE_EXTRA(dev, headroom);
+	cf_sk->tailroom = tailroom;
+	cf_sk->maxframe = dev->mtu - (headroom + tailroom);
+	dev_put(dev);
+	if (cf_sk->maxframe < 1) {
+		pr_warning("CAIF: %s(): CAIF Interface MTU too small (%d)\n",
+			__func__, dev->mtu);
+		err = -ENODEV;
+		goto out;
+	}
 
 	err = -EINPROGRESS;
 wait_connect:
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index cff2dcb9efe4..1c29189b344d 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
+#include <linux/netdevice.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfpkt.h>
 #include <net/caif/cfcnfg.h>
@@ -42,6 +43,15 @@ struct cfcnfg_phyinfo {
 
 	/* Information about the physical device */
 	struct dev_info dev_info;
+
+	/* Interface index */
+	int ifindex;
+
+	/* Use Start of frame extension */
+	bool use_stx;
+
+	/* Use Start of frame checksum */
+	bool use_fcs;
 };
 
 struct cfcnfg {
@@ -249,9 +259,20 @@ static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id)
 {
 }
 
+int protohead[CFCTRL_SRV_MASK] = {
+	[CFCTRL_SRV_VEI] = 4,
+	[CFCTRL_SRV_DATAGRAM] = 7,
+	[CFCTRL_SRV_UTIL] = 4,
+	[CFCTRL_SRV_RFM] = 3,
+	[CFCTRL_SRV_DBG] = 3,
+};
+
 int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 				struct cfctrl_link_param *param,
-				struct cflayer *adap_layer)
+				struct cflayer *adap_layer,
+				int *ifindex,
+				int *proto_head,
+				int *proto_tail)
 {
 	struct cflayer *frml;
 	if (adap_layer == NULL) {
@@ -277,6 +298,14 @@ int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 		     param->phyid);
 	caif_assert(cnfg->phy_layers[param->phyid].phy_layer->id ==
 		     param->phyid);
+
+	*ifindex = cnfg->phy_layers[param->phyid].ifindex;
+	*proto_head =
+		protohead[param->linktype]+
+		(cnfg->phy_layers[param->phyid].use_stx ? 1 : 0);
+
+	*proto_tail = 2;
+
 	/* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */
 	cfctrl_enum_req(cnfg->ctrl, param->phyid);
 	return cfctrl_linkup_request(cnfg->ctrl, param, adap_layer);
@@ -298,6 +327,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 	struct cfcnfg *cnfg = container_obj(layer);
 	struct cflayer *servicel = NULL;
 	struct cfcnfg_phyinfo *phyinfo;
+	struct net_device *netdev;
+
 	if (adapt_layer == NULL) {
 		pr_debug("CAIF: %s(): link setup response "
 				"but no client exist, send linkdown back\n",
@@ -329,8 +360,9 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 		servicel = cfdgml_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_RFM:
+		netdev = phyinfo->dev_info.dev;
 		servicel = cfrfml_create(channel_id, &phyinfo->dev_info,
-						RFM_FRAGMENT_SIZE);
+						netdev->mtu);
 		break;
 	case CFCTRL_SRV_UTIL:
 		servicel = cfutill_create(channel_id, &phyinfo->dev_info);
@@ -361,8 +393,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 
 void
 cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
-		     void *dev, struct cflayer *phy_layer, u16 *phyid,
-		     enum cfcnfg_phy_preference pref,
+		     struct net_device *dev, struct cflayer *phy_layer,
+		     u16 *phyid, enum cfcnfg_phy_preference pref,
 		     bool fcs, bool stx)
 {
 	struct cflayer *frml;
@@ -416,6 +448,10 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
 	cnfg->phy_layers[*phyid].dev_info.dev = dev;
 	cnfg->phy_layers[*phyid].phy_layer = phy_layer;
 	cnfg->phy_layers[*phyid].phy_ref_count = 0;
+	cnfg->phy_layers[*phyid].ifindex = dev->ifindex;
+	cnfg->phy_layers[*phyid].use_stx = stx;
+	cnfg->phy_layers[*phyid].use_fcs = fcs;
+
 	phy_layer->type = phy_type;
 	frml = cffrml_create(*phyid, fcs);
 	if (!frml) {
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 107c4b2a311e..563145fdc4c3 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -19,7 +19,7 @@
 #ifdef CAIF_NO_LOOP
 static int handle_loop(struct cfctrl *ctrl,
 			      int cmd, struct cfpkt *pkt){
-	return CAIF_FAILURE;
+	return -1;
 }
 #else
 static int handle_loop(struct cfctrl *ctrl,
@@ -395,7 +395,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 	cmd = cmdrsp & CFCTRL_CMD_MASK;
 	if (cmd != CFCTRL_CMD_LINK_ERR
 	    && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) {
-		if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE)
+		if (handle_loop(cfctrl, cmd, pkt) != 0)
 			cmdrsp |= CFCTRL_ERR_BIT;
 	}
 
@@ -647,6 +647,6 @@ found:
 	default:
 		break;
 	}
-	return CAIF_SUCCESS;
+	return 0;
 }
 #endif
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index 32d9f0dc8463..ed9d53aff280 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -17,6 +17,7 @@
 #define DGM_FLOW_OFF 0x81
 #define DGM_FLOW_ON  0x80
 #define DGM_CTRL_PKT_SIZE 1
+#define DGM_MTU 1500
 
 static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt);
@@ -89,6 +90,10 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 	if (!cfsrvl_ready(service, &ret))
 		return ret;
 
+	/* STE Modem cannot handle more than 1500 bytes datagrams */
+	if (cfpkt_getlen(pkt) > DGM_MTU)
+		return -EMSGSIZE;
+
 	cfpkt_add_head(pkt, &zero, 4);
 
 	/* Add info for MUX-layer to route the packet out. */
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 318b0f4b416e..01f238ff2346 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -9,8 +9,8 @@
 #include <linux/hardirq.h>
 #include <net/caif/cfpkt.h>
 
-#define PKT_PREFIX CAIF_NEEDED_HEADROOM
-#define PKT_POSTFIX CAIF_NEEDED_TAILROOM
+#define PKT_PREFIX  16
+#define PKT_POSTFIX 2
 #define PKT_LEN_WHEN_EXTENDING 128
 #define PKT_ERROR(pkt, errmsg) do {	   \
     cfpkt_priv(pkt)->erronous = true;	   \
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 965c5baace40..a11fbd68a13d 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -14,7 +14,8 @@
 #define container_obj(layr) ((struct cfserl *) layr)
 
 #define CFSERL_STX 0x02
-#define CAIF_MINIUM_PACKET_SIZE 4
+#define SERIAL_MINIUM_PACKET_SIZE 4
+#define SERIAL_MAX_FRAMESIZE 4096
 struct cfserl {
 	struct cflayer layer;
 	struct cfpkt *incomplete_frm;
@@ -119,8 +120,8 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
 		/*
 		 * Frame error handling
 		 */
-		if (expectlen < CAIF_MINIUM_PACKET_SIZE
-		    || expectlen > CAIF_MAX_FRAMESIZE) {
+		if (expectlen < SERIAL_MINIUM_PACKET_SIZE
+		    || expectlen > SERIAL_MAX_FRAMESIZE) {
 			if (!layr->usestx) {
 				if (pkt != NULL)
 					cfpkt_destroy(pkt);
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 4c9f147c38af..f40939a91211 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -162,7 +162,6 @@ void cfservl_destroy(struct cflayer *layer)
 void cfsrvl_release(struct kref *kref)
 {
 	struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
-	pr_info("CAIF: %s(): enter\n", __func__);
 	kfree(service);
 }
 
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index ce525cac9064..02795aff57a4 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -90,12 +90,6 @@ static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt)
 	if (!cfsrvl_ready(service, &ret))
 		return ret;
 
-	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_err("CAIF: %s(): packet too large size=%d\n",
-			__func__, cfpkt_getlen(pkt));
-		return -EOVERFLOW;
-	}
-
 	cfpkt_add_head(pkt, &zero, 1);
 	/* Add info for MUX-layer to route the packet out. */
 	info = cfpkt_info(pkt);
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 637cb0eee13c..77cc09faac9a 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -84,11 +84,6 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
 		return ret;
 	caif_assert(layr->dn != NULL);
 	caif_assert(layr->dn->transmit != NULL);
-	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_warning("CAIF: %s(): Packet too large - size=%d\n",
-			   __func__, cfpkt_getlen(pkt));
-		return -EOVERFLOW;
-	}
 
 	if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
 		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 610966abe2dc..4293e190ec53 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -23,7 +23,7 @@
 #include <net/caif/caif_dev.h>
 
 /* GPRS PDP connection has MTU to 1500 */
-#define SIZE_MTU 1500
+#define GPRS_PDP_MTU 1500
 /* 5 sec. connect timeout */
 #define CONNECT_TIMEOUT (5 * HZ)
 #define CAIF_NET_DEFAULT_QUEUE_LEN 500
@@ -232,6 +232,8 @@ static int chnl_net_open(struct net_device *dev)
 {
 	struct chnl_net *priv = NULL;
 	int result = -1;
+	int llifindex, headroom, tailroom, mtu;
+	struct net_device *lldev;
 	ASSERT_RTNL();
 	priv = netdev_priv(dev);
 	if (!priv) {
@@ -241,41 +243,88 @@ static int chnl_net_open(struct net_device *dev)
 
 	if (priv->state != CAIF_CONNECTING) {
 		priv->state = CAIF_CONNECTING;
-		result = caif_connect_client(&priv->conn_req, &priv->chnl);
+		result = caif_connect_client(&priv->conn_req, &priv->chnl,
+					&llifindex, &headroom, &tailroom);
 		if (result != 0) {
-				priv->state = CAIF_DISCONNECTED;
 				pr_debug("CAIF: %s(): err: "
 					"Unable to register and open device,"
 					" Err:%d\n",
 					__func__,
 					result);
-				return result;
+				goto error;
+		}
+
+		lldev = dev_get_by_index(dev_net(dev), llifindex);
+
+		if (lldev == NULL) {
+			pr_debug("CAIF: %s(): no interface?\n", __func__);
+			result = -ENODEV;
+			goto error;
+		}
+
+		dev->needed_tailroom = tailroom + lldev->needed_tailroom;
+		dev->hard_header_len = headroom + lldev->hard_header_len +
+			lldev->needed_tailroom;
+
+		/*
+		 * MTU, head-room etc is not know before we have a
+		 * CAIF link layer device available. MTU calculation may
+		 * override initial RTNL configuration.
+		 * MTU is minimum of current mtu, link layer mtu pluss
+		 * CAIF head and tail, and PDP GPRS contexts max MTU.
+		 */
+		mtu = min_t(int, dev->mtu, lldev->mtu - (headroom + tailroom));
+		mtu = min_t(int, GPRS_PDP_MTU, mtu);
+		dev_set_mtu(dev, mtu);
+		dev_put(lldev);
+
+		if (mtu < 100) {
+			pr_warning("CAIF: %s(): "
+				"CAIF Interface MTU too small (%d)\n",
+				__func__, mtu);
+			result = -ENODEV;
+			goto error;
 		}
 	}
 
+	rtnl_unlock();  /* Release RTNL lock during connect wait */
+
 	result = wait_event_interruptible_timeout(priv->netmgmt_wq,
 						priv->state != CAIF_CONNECTING,
 						CONNECT_TIMEOUT);
 
+	rtnl_lock();
+
 	if (result == -ERESTARTSYS) {
 		pr_debug("CAIF: %s(): wait_event_interruptible"
 			 " woken by a signal\n", __func__);
-		return -ERESTARTSYS;
+		result = -ERESTARTSYS;
+		goto error;
 	}
+
 	if (result == 0) {
 		pr_debug("CAIF: %s(): connect timeout\n", __func__);
 		caif_disconnect_client(&priv->chnl);
 		priv->state = CAIF_DISCONNECTED;
 		pr_debug("CAIF: %s(): state disconnected\n", __func__);
-		return -ETIMEDOUT;
+		result = -ETIMEDOUT;
+		goto error;
 	}
 
 	if (priv->state != CAIF_CONNECTED) {
 		pr_debug("CAIF: %s(): connect failed\n", __func__);
-		return -ECONNREFUSED;
+		result = -ECONNREFUSED;
+		goto error;
 	}
 	pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__);
 	return 0;
+
+error:
+	caif_disconnect_client(&priv->chnl);
+	priv->state = CAIF_DISCONNECTED;
+	pr_debug("CAIF: %s(): state disconnected\n", __func__);
+	return result;
+
 }
 
 static int chnl_net_stop(struct net_device *dev)
@@ -321,9 +370,7 @@ static void ipcaif_net_setup(struct net_device *dev)
 	dev->destructor = free_netdev;
 	dev->flags |= IFF_NOARP;
 	dev->flags |= IFF_POINTOPOINT;
-	dev->needed_headroom = CAIF_NEEDED_HEADROOM;
-	dev->needed_tailroom = CAIF_NEEDED_TAILROOM;
-	dev->mtu = SIZE_MTU;
+	dev->mtu = GPRS_PDP_MTU;
 	dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN;
 
 	priv = netdev_priv(dev);
-- 
cgit v1.2.3-55-g7522


From 8f1c14b2e3b1805d3e9e6a306d07f5371ea703a7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Wed, 23 Jun 2010 00:32:03 +0000
Subject: snmp: fix SNMP_ADD_STATS()

commit aa2ea0586d9d (tcp: fix outsegs stat for TSO segments) incorrectly
assumed SNMP_ADD_STATS() was used from BH context.

Fix this using mib[!in_softirq()] instead of mib[0]

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/snmp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/snmp.h b/include/net/snmp.h
index 92456f1035f5..899003d18db9 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -134,7 +134,7 @@ struct linux_xfrm_mib {
 #define SNMP_ADD_STATS_USER(mib, field, addend)	\
 			this_cpu_add(mib[1]->mibs[field], addend)
 #define SNMP_ADD_STATS(mib, field, addend)	\
-			this_cpu_add(mib[0]->mibs[field], addend)
+			this_cpu_add(mib[!in_softirq()]->mibs[field], addend)
 /*
  * Use "__typeof__(*mib[0]) *ptr" instead of "__typeof__(mib[0]) ptr"
  * to make @ptr a non-percpu pointer.
-- 
cgit v1.2.3-55-g7522


From 1dc8d8c06d4002be4d1373fc06f25cd589be47e1 Mon Sep 17 00:00:00 2001
From: Justin P. Mattock
Date: Mon, 21 Jun 2010 13:45:42 -0700
Subject: net: Fix a typo in netlink.h

Fix a typo in include/net/netlink.h
should be finalize instead of finanlize

Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 4fc05b58503e..f3b201d335b3 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -35,7 +35,7 @@
  *   nlmsg_new()			create a new netlink message
  *   nlmsg_put()			add a netlink message to an skb
  *   nlmsg_put_answer()			callback based nlmsg_put()
- *   nlmsg_end()			finanlize netlink message
+ *   nlmsg_end()			finalize netlink message
  *   nlmsg_get_pos()			return current position in message
  *   nlmsg_trim()			trim part of message
  *   nlmsg_cancel()			cancel message construction
-- 
cgit v1.2.3-55-g7522


From 7b2ff18ee7b0ec4bc3162f821e221781aaca48bd Mon Sep 17 00:00:00 2001
From: Jiri Olsa
Date: Tue, 15 Jun 2010 01:07:31 +0000
Subject: net - IP_NODEFRAG option for IPv4 socket

this patch is implementing IP_NODEFRAG option for IPv4 socket.
The reason is, there's no other way to send out the packet with user
customized header of the reassembly part.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h                  | 1 +
 include/net/inet_sock.h             | 3 ++-
 net/ipv4/af_inet.c                  | 2 ++
 net/ipv4/ip_sockglue.c              | 9 ++++++++-
 net/ipv4/netfilter/nf_defrag_ipv4.c | 5 +++++
 5 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/in.h b/include/linux/in.h
index 583c76f9c30f..41d88a4689af 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -85,6 +85,7 @@ struct in_addr {
 #define IP_RECVORIGDSTADDR   IP_ORIGDSTADDR
 
 #define IP_MINTTL       21
+#define IP_NODEFRAG     22
 
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 1653de515cee..1989cfd7405f 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -137,7 +137,8 @@ struct inet_sock {
 				hdrincl:1,
 				mc_loop:1,
 				transparent:1,
-				mc_all:1;
+				mc_all:1,
+				nodefrag:1;
 	int			mc_index;
 	__be32			mc_addr;
 	struct ip_mc_socklist	*mc_list;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d99e7e020189..b4c0969137cb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -355,6 +355,8 @@ lookup_protocol:
 	inet = inet_sk(sk);
 	inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
 
+	inet->nodefrag = 0;
+
 	if (SOCK_RAW == sock->type) {
 		inet->inet_num = protocol;
 		if (IPPROTO_RAW == protocol)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 47fff528ff39..6c40a8c46e79 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -465,7 +465,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			     (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
 			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
 			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
-			     (1<<IP_MINTTL))) ||
+			     (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
 	    optname == IP_MULTICAST_TTL ||
 	    optname == IP_MULTICAST_ALL ||
 	    optname == IP_MULTICAST_LOOP ||
@@ -588,6 +588,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		}
 		inet->hdrincl = val ? 1 : 0;
 		break;
+	case IP_NODEFRAG:
+		if (sk->sk_type != SOCK_RAW) {
+			err = -ENOPROTOOPT;
+			break;
+		}
+		inet->nodefrag = val ? 1 : 0;
+		break;
 	case IP_MTU_DISCOVER:
 		if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
 			goto e_inval;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index cb763ae9ed90..eab8de32f200 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -66,6 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 					  const struct net_device *out,
 					  int (*okfn)(struct sk_buff *))
 {
+	struct inet_sock *inet = inet_sk(skb->sk);
+
+	if (inet && inet->nodefrag)
+		return NF_ACCEPT;
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
 	/* Previously seen (loopback)?  Ignore.  Do this before
-- 
cgit v1.2.3-55-g7522


From a8756201ba4189bca3ee1a6ec4e290f467ee09ab Mon Sep 17 00:00:00 2001
From: Tim Gardner
Date: Fri, 25 Jun 2010 14:44:07 +0200
Subject: netfilter: xt_connbytes: Force CT accounting to be enabled

Check at rule install time that CT accounting is enabled. Force it
to be enabled if not while also emitting a warning since this is not
the default state.

This is in preparation for deprecating CONFIG_NF_CT_ACCT upon which
CONFIG_NETFILTER_XT_MATCH_CONNBYTES depended being set.

Added 2 CT accounting support functions:

nf_ct_acct_enabled() - Get CT accounting state.
nf_ct_set_acct() - Enable/disable CT accountuing.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Acked-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_acct.h | 12 ++++++++++++
 net/netfilter/xt_connbytes.c              | 10 ++++++++++
 2 files changed, 22 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index 03e218f0be43..4e9c63a20db2 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -45,6 +45,18 @@ struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
 extern unsigned int
 seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir);
 
+/* Check if connection tracking accounting is enabled */
+static inline bool nf_ct_acct_enabled(struct net *net)
+{
+	return net->ct.sysctl_acct != 0;
+}
+
+/* Enable/disable connection tracking accounting */
+static inline void nf_ct_set_acct(struct net *net, bool enable)
+{
+	net->ct.sysctl_acct = enable;
+}
+
 extern int nf_conntrack_acct_init(struct net *net);
 extern void nf_conntrack_acct_fini(struct net *net);
 
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 73517835303d..5b138506690e 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -112,6 +112,16 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
 	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
+
+	/*
+	 * This filter cannot function correctly unless connection tracking
+	 * accounting is enabled, so complain in the hope that someone notices.
+	 */
+	if (!nf_ct_acct_enabled(par->net)) {
+		pr_warning("Forcing CT accounting to be enabled\n");
+		nf_ct_set_acct(par->net, true);
+	}
+
 	return ret;
 }
 
-- 
cgit v1.2.3-55-g7522


From 1823e4c80eeae2a774c75569ce3035070e5ee009 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Tue, 22 Jun 2010 20:58:41 +0000
Subject: snmp: add align parameter to snmp_mib_init()

In preparation for 64bit snmp counters for some mibs,
add an 'align' parameter to snmp_mib_init(), instead
of assuming mibs only contain 'unsigned long' fields.

Callers can use __alignof__(type) to provide correct
alignment.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
CC: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       |  2 +-
 net/dccp/proto.c       |  3 ++-
 net/ipv4/af_inet.c     | 27 +++++++++++++++++----------
 net/ipv6/addrconf.c    |  9 ++++++---
 net/ipv6/af_inet6.c    | 15 ++++++++++-----
 net/sctp/protocol.c    |  3 ++-
 net/xfrm/xfrm_policy.c |  3 ++-
 7 files changed, 40 insertions(+), 22 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index d52f01180361..3b524df7dddb 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -178,7 +178,7 @@ extern struct ipv4_config ipv4_config;
 #define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd)
 
 extern unsigned long snmp_fold_field(void __percpu *mib[], int offt);
-extern int snmp_mib_init(void __percpu *ptr[2], size_t mibsize);
+extern int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align);
 extern void snmp_mib_free(void __percpu *ptr[2]);
 
 extern struct local_ports {
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index f79bcef5088f..096250d1323b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1002,7 +1002,8 @@ EXPORT_SYMBOL_GPL(dccp_shutdown);
 static inline int dccp_mib_init(void)
 {
 	return snmp_mib_init((void __percpu **)dccp_statistics,
-			     sizeof(struct dccp_mib));
+			     sizeof(struct dccp_mib),
+			     __alignof__(struct dccp_mib));
 }
 
 static inline void dccp_mib_exit(void)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b4c0969137cb..640db9b90330 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1427,13 +1427,13 @@ unsigned long snmp_fold_field(void __percpu *mib[], int offt)
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
-int snmp_mib_init(void __percpu *ptr[2], size_t mibsize)
+int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
 {
 	BUG_ON(ptr == NULL);
-	ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long));
+	ptr[0] = __alloc_percpu(mibsize, align);
 	if (!ptr[0])
 		goto err0;
-	ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long));
+	ptr[1] = __alloc_percpu(mibsize, align);
 	if (!ptr[1])
 		goto err1;
 	return 0;
@@ -1490,25 +1490,32 @@ static const struct net_protocol icmp_protocol = {
 static __net_init int ipv4_mib_init_net(struct net *net)
 {
 	if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
-			  sizeof(struct tcp_mib)) < 0)
+			  sizeof(struct tcp_mib),
+			  __alignof__(struct tcp_mib)) < 0)
 		goto err_tcp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
-			  sizeof(struct ipstats_mib)) < 0)
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
-			  sizeof(struct linux_mib)) < 0)
+			  sizeof(struct linux_mib),
+			  __alignof__(struct linux_mib)) < 0)
 		goto err_net_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
-			  sizeof(struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
-			  sizeof(struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udplite_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
-			  sizeof(struct icmp_mib)) < 0)
+			  sizeof(struct icmp_mib),
+			  __alignof__(struct icmp_mib)) < 0)
 		goto err_icmp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics,
-			  sizeof(struct icmpmsg_mib)) < 0)
+			  sizeof(struct icmpmsg_mib),
+			  __alignof__(struct icmpmsg_mib)) < 0)
 		goto err_icmpmsg_mib;
 
 	tcp_mib_init(net);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b97bb1f30808..c20a7c260a8f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -284,13 +284,16 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
 static int snmp6_alloc_dev(struct inet6_dev *idev)
 {
 	if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
-			  sizeof(struct ipstats_mib)) < 0)
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip;
 	if (snmp_mib_init((void __percpu **)idev->stats.icmpv6,
-			  sizeof(struct icmpv6_mib)) < 0)
+			  sizeof(struct icmpv6_mib),
+			  __alignof__(struct icmpv6_mib)) < 0)
 		goto err_icmp;
 	if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg,
-			  sizeof(struct icmpv6msg_mib)) < 0)
+			  sizeof(struct icmpv6msg_mib),
+			  __alignof__(struct icmpv6msg_mib)) < 0)
 		goto err_icmpmsg;
 
 	return 0;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 94b1b9c954bf..e830cd4f9d0f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -971,19 +971,24 @@ static void ipv6_packet_cleanup(void)
 static int __net_init ipv6_init_mibs(struct net *net)
 {
 	if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
-			  sizeof (struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		return -ENOMEM;
 	if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6,
-			  sizeof (struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udplite_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics,
-			  sizeof(struct ipstats_mib)) < 0)
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
-			  sizeof(struct icmpv6_mib)) < 0)
+			  sizeof(struct icmpv6_mib),
+			  __alignof__(struct icmpv6_mib)) < 0)
 		goto err_icmp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics,
-			  sizeof(struct icmpv6msg_mib)) < 0)
+			  sizeof(struct icmpv6msg_mib),
+			  __alignof__(struct icmpv6msg_mib)) < 0)
 		goto err_icmpmsg_mib;
 	return 0;
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a0e1a7fdebbf..c0e162aeb0bd 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1002,7 +1002,8 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family)
 static inline int init_sctp_mibs(void)
 {
 	return snmp_mib_init((void __percpu **)sctp_statistics,
-			     sizeof(struct sctp_mib));
+			     sizeof(struct sctp_mib),
+			     __alignof__(struct sctp_mib));
 }
 
 static inline void cleanup_sctp_mibs(void)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4bf27d901333..593c06be6b62 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2480,7 +2480,8 @@ static int __net_init xfrm_statistics_init(struct net *net)
 	int rv;
 
 	if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
-			  sizeof(struct linux_xfrm_mib)) < 0)
+			  sizeof(struct linux_xfrm_mib),
+			  __alignof__(struct linux_xfrm_mib)) < 0)
 		return -ENOMEM;
 	rv = xfrm_proc_init(net);
 	if (rv < 0)
-- 
cgit v1.2.3-55-g7522


From 172d69e63c7f1e8300d0e1c1bbd8eb0f630faa15 Mon Sep 17 00:00:00 2001
From: Florian Westphal
Date: Mon, 21 Jun 2010 11:48:45 +0000
Subject: syncookies: add support for ECN

Allows use of ECN when syncookies are in effect by encoding ecn_ok
into the syn-ack tcp timestamp.

While at it, remove a uneeded #ifdef CONFIG_SYN_COOKIES.
With CONFIG_SYN_COOKIES=nm want_cookie is ifdef'd to 0 and gcc
removes the "if (0)".

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  2 +-
 net/ipv4/syncookies.c | 15 ++++++++++-----
 net/ipv4/tcp_ipv4.c   |  6 ++----
 net/ipv6/syncookies.c |  5 +++--
 net/ipv6/tcp_ipv6.c   |  2 +-
 5 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 18c246c9b009..c2f96c2cc89c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -464,7 +464,7 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
 				     __u16 *mss);
 
 extern __u32 cookie_init_timestamp(struct request_sock *req);
-extern bool cookie_check_timestamp(struct tcp_options_received *tcp_opt);
+extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *);
 
 /* From net/ipv6/syncookies.c */
 extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 8896329aebd0..650cace2180d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -19,7 +19,7 @@
 #include <net/route.h>
 
 /* Timestamps: lowest bits store TCP options */
-#define TSBITS 5
+#define TSBITS 6
 #define TSMASK (((__u32)1 << TSBITS) - 1)
 
 extern int sysctl_tcp_syncookies;
@@ -73,6 +73,7 @@ __u32 cookie_init_timestamp(struct request_sock *req)
 
 	options = ireq->wscale_ok ? ireq->snd_wscale : 0xf;
 	options |= ireq->sack_ok << 4;
+	options |= ireq->ecn_ok << 5;
 
 	ts = ts_now & ~TSMASK;
 	ts |= options;
@@ -226,11 +227,11 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  * This extracts these options from the timestamp echo.
  *
  * The lowest 4 bits store snd_wscale.
- * The next lsb is for sack_ok
+ * next 2 bits indicate SACK and ECN support.
  *
  * return false if we decode an option that should not be.
  */
-bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
+bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
 {
 	/* echoed timestamp, lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
@@ -244,6 +245,9 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 		return false;
 
 	tcp_opt->sack_ok = (options >> 4) & 0x1;
+	*ecn_ok = (options >> 5) & 1;
+	if (*ecn_ok && !sysctl_tcp_ecn)
+		return false;
 
 	if (tcp_opt->sack_ok && !sysctl_tcp_sack)
 		return false;
@@ -272,6 +276,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	int mss;
 	struct rtable *rt;
 	__u8 rcv_wscale;
+	bool ecn_ok;
 
 	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -288,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (!cookie_check_timestamp(&tcp_opt))
+	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
 		goto out;
 
 	ret = NULL;
@@ -305,7 +310,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->rmt_port		= th->source;
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
-	ireq->ecn_ok		= 0;
+	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2e41e6f92968..8fa32f5ae2ce 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1328,14 +1328,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
 
-	if (!want_cookie)
+	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (want_cookie) {
-#ifdef CONFIG_SYN_COOKIES
-		req->cookie_ts = tmp_opt.tstamp_ok;
-#endif
 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+		req->cookie_ts = tmp_opt.tstamp_ok;
 	} else if (!isn) {
 		struct inet_peer *peer = NULL;
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 84d818cfae17..09fd34f0dbf2 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -164,6 +164,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	int mss;
 	struct dst_entry *dst;
 	__u8 rcv_wscale;
+	bool ecn_ok;
 
 	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -180,7 +181,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (!cookie_check_timestamp(&tcp_opt))
+	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
 		goto out;
 
 	ret = NULL;
@@ -215,7 +216,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	req->expires = 0UL;
 	req->retrans = 0;
-	ireq->ecn_ok		= 0;
+	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f87534569366..5ebc27ecebdc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1269,7 +1269,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	treq = inet6_rsk(req);
 	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
-	if (!want_cookie)
+	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (!isn) {
-- 
cgit v1.2.3-55-g7522


From 210d6de78c5d7c785fc532556cea340e517955e1 Mon Sep 17 00:00:00 2001
From: Changli Gao
Date: Thu, 24 Jun 2010 16:25:12 +0000
Subject: act_mirred: don't clone skb when skb isn't shared

don't clone skb when skb isn't shared

When the tcf_action is TC_ACT_STOLEN, and the skb isn't shared, we don't need
to clone a new skb. As the skb will be freed after this function returns, we
can use it freely once we get a reference to it.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/sch_generic.h |   11 +++++++++--
 net/sched/act_mirred.c    |    6 +++---
 2 files changed, 12 insertions(+), 5 deletions(-)
Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 11 +++++++++--
 net/sched/act_mirred.c    |  6 +++---
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b35301b0c7b6..977ec06ed0c7 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -594,9 +594,16 @@ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen)
 }
 
 #ifdef CONFIG_NET_CLS_ACT
-static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask)
+static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask,
+					    int action)
 {
-	struct sk_buff *n = skb_clone(skb, gfp_mask);
+	struct sk_buff *n;
+
+	if ((action == TC_ACT_STOLEN || action == TC_ACT_QUEUED) &&
+	    !skb_shared(skb))
+		n = skb_get(skb);
+	else
+		n = skb_clone(skb, gfp_mask);
 
 	if (n) {
 		n->tc_verd = SET_TC_VERD(n->tc_verd, 0);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c0b6863e3b87..2e9a7b91aa10 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -169,13 +169,13 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 		goto out;
 	}
 
-	skb2 = skb_act_clone(skb, GFP_ATOMIC);
+	at = G_TC_AT(skb->tc_verd);
+	skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action);
 	if (skb2 == NULL)
 		goto out;
 
 	m->tcf_bstats.bytes += qdisc_pkt_len(skb2);
 	m->tcf_bstats.packets++;
-	at = G_TC_AT(skb->tc_verd);
 	if (!(at & AT_EGRESS)) {
 		if (m->tcfm_ok_push)
 			skb_push(skb2, skb2->dev->hard_header_len);
@@ -185,8 +185,8 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 	if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
 		skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
 
-	skb2->dev = dev;
 	skb2->skb_iif = skb->dev->ifindex;
+	skb2->dev = dev;
 	dev_queue_xmit(skb2);
 	err = 0;
 
-- 
cgit v1.2.3-55-g7522


From 529d6dad5bc69de14cdd24831e2a14264e93daa4 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland
Date: Tue, 29 Jun 2010 00:08:21 -0700
Subject: caif-driver: Add CAIF-SPI Protocol driver.

This patch introduces the CAIF SPI Protocol Driver for
CAIF Link Layer.

This driver implements a platform driver to accommodate for a
platform specific SPI device. A general platform driver is not
possible as there are no SPI Slave side Kernel API defined.
A sample CAIF SPI Platform device can be found in
.../Documentation/networking/caif/spi_porting.txt

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/caif/spi_porting.txt | 208 +++++++
 drivers/net/caif/Kconfig                      |  19 +
 drivers/net/caif/Makefile                     |   4 +
 drivers/net/caif/caif_spi.c                   | 847 ++++++++++++++++++++++++++
 drivers/net/caif/caif_spi_slave.c             | 252 ++++++++
 include/net/caif/caif_spi.h                   | 153 +++++
 6 files changed, 1483 insertions(+)
 create mode 100644 Documentation/networking/caif/spi_porting.txt
 create mode 100644 drivers/net/caif/caif_spi.c
 create mode 100644 drivers/net/caif/caif_spi_slave.c
 create mode 100644 include/net/caif/caif_spi.h

(limited to 'include/net')

diff --git a/Documentation/networking/caif/spi_porting.txt b/Documentation/networking/caif/spi_porting.txt
new file mode 100644
index 000000000000..61d7c9247453
--- /dev/null
+++ b/Documentation/networking/caif/spi_porting.txt
@@ -0,0 +1,208 @@
+- CAIF SPI porting -
+
+- CAIF SPI basics:
+
+Running CAIF over SPI needs some extra setup, owing to the nature of SPI.
+Two extra GPIOs have been added in order to negotiate the transfers
+ between the master and the slave. The minimum requirement for running
+CAIF over SPI is a SPI slave chip and two GPIOs (more details below).
+Please note that running as a slave implies that you need to keep up
+with the master clock. An overrun or underrun event is fatal.
+
+- CAIF SPI framework:
+
+To make porting as easy as possible, the CAIF SPI has been divided in
+two parts. The first part (called the interface part) deals with all
+generic functionality such as length framing, SPI frame negotiation
+and SPI frame delivery and transmission. The other part is the CAIF
+SPI slave device part, which is the module that you have to write if
+you want to run SPI CAIF on a new hardware. This part takes care of
+the physical hardware, both with regard to SPI and to GPIOs.
+
+- Implementing a CAIF SPI device:
+
+	- Functionality provided by the CAIF SPI slave device:
+
+	In order to implement a SPI device you will, as a minimum,
+	need to implement the following
+	functions:
+
+	int (*init_xfer) (struct cfspi_xfer * xfer, struct cfspi_dev *dev):
+
+	This function is called by the CAIF SPI interface to give
+	you a chance to set up your hardware to be ready to receive
+	a stream of data from the master. The xfer structure contains
+	both physical and logical adresses, as well as the total length
+	of the transfer in both directions.The dev parameter can be used
+	to map to different CAIF SPI slave devices.
+
+	void (*sig_xfer) (bool xfer, struct cfspi_dev *dev):
+
+	This function is called by the CAIF SPI interface when the output
+	(SPI_INT) GPIO needs to change state. The boolean value of the xfer
+	variable indicates whether the GPIO should be asserted (HIGH) or
+	deasserted (LOW). The dev parameter can be used to map to different CAIF
+	SPI slave devices.
+
+	- Functionality provided by the CAIF SPI interface:
+
+	void (*ss_cb) (bool assert, struct cfspi_ifc *ifc);
+
+	This function is called by the CAIF SPI slave device in order to
+	signal a change of state of the input GPIO (SS) to the interface.
+	Only active edges are mandatory to be reported.
+	This function can be called from IRQ context (recommended in order
+	not to introduce latency). The ifc parameter should be the pointer
+	returned from the platform probe function in the SPI device structure.
+
+	void (*xfer_done_cb) (struct cfspi_ifc *ifc);
+
+	This function is called by the CAIF SPI slave device in order to
+	report that a transfer is completed. This function should only be
+	called once both the transmission and the reception are completed.
+	This function can be called from IRQ context (recommended in order
+	not to introduce latency). The ifc parameter should be the pointer
+	returned from the platform probe function in the SPI device structure.
+
+	- Connecting the bits and pieces:
+
+		- Filling in the SPI slave device structure:
+
+		Connect the necessary callback functions.
+		Indicate clock speed (used to calculate toggle delays).
+		Chose a suitable name (helps debugging if you use several CAIF
+		SPI slave devices).
+		Assign your private data (can be used to map to your structure).
+
+		- Filling in the SPI slave platform device structure:
+		Add name of driver to connect to ("cfspi_sspi").
+		Assign the SPI slave device structure as platform data.
+
+- Padding:
+
+In order to optimize throughput, a number of SPI padding options are provided.
+Padding can be enabled independently for uplink and downlink transfers.
+Padding can be enabled for the head, the tail and for the total frame size.
+The padding needs to be correctly configured on both sides of the link.
+The padding can be changed via module parameters in cfspi_sspi.c or via
+the sysfs directory of the cfspi_sspi driver (before device registration).
+
+- CAIF SPI device template:
+
+/*
+ *	Copyright (C) ST-Ericsson AB 2010
+ *	Author: Daniel Martensson / Daniel.Martensson@stericsson.com
+ *	License terms: GNU General Public License (GPL), version 2.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <net/caif/caif_spi.h>
+
+MODULE_LICENSE("GPL");
+
+struct sspi_struct {
+	struct cfspi_dev sdev;
+	struct cfspi_xfer *xfer;
+};
+
+static struct sspi_struct slave;
+static struct platform_device slave_device;
+
+static irqreturn_t sspi_irq(int irq, void *arg)
+{
+	/* You only need to trigger on an edge to the active state of the
+	 * SS signal. Once a edge is detected, the ss_cb() function should be
+	 * called with the parameter assert set to true. It is OK
+	 * (and even advised) to call the ss_cb() function in IRQ context in
+	 * order not to add any delay. */
+
+	return IRQ_HANDLED;
+}
+
+static void sspi_complete(void *context)
+{
+	/* Normally the DMA or the SPI framework will call you back
+	 * in something similar to this. The only thing you need to
+	 * do is to call the xfer_done_cb() function, providing the pointer
+	 * to the CAIF SPI interface. It is OK to call this function
+	 * from IRQ context. */
+}
+
+static int sspi_init_xfer(struct cfspi_xfer *xfer, struct cfspi_dev *dev)
+{
+	/* Store transfer info. For a normal implementation you should
+	 * set up your DMA here and make sure that you are ready to
+	 * receive the data from the master SPI. */
+
+	struct sspi_struct *sspi = (struct sspi_struct *)dev->priv;
+
+	sspi->xfer = xfer;
+
+	return 0;
+}
+
+void sspi_sig_xfer(bool xfer, struct cfspi_dev *dev)
+{
+	/* If xfer is true then you should assert the SPI_INT to indicate to
+	 * the master that you are ready to recieve the data from the master
+	 * SPI. If xfer is false then you should de-assert SPI_INT to indicate
+	 * that the transfer is done.
+	 */
+
+	struct sspi_struct *sspi = (struct sspi_struct *)dev->priv;
+}
+
+static void sspi_release(struct device *dev)
+{
+	/*
+	 * Here you should release your SPI device resources.
+	 */
+}
+
+static int __init sspi_init(void)
+{
+	/* Here you should initialize your SPI device by providing the
+	 * necessary functions, clock speed, name and private data. Once
+	 * done, you can register your device with the
+	 * platform_device_register() function. This function will return
+	 * with the CAIF SPI interface initialized. This is probably also
+	 * the place where you should set up your GPIOs, interrupts and SPI
+	 * resources. */
+
+	int res = 0;
+
+	/* Initialize slave device. */
+	slave.sdev.init_xfer = sspi_init_xfer;
+	slave.sdev.sig_xfer = sspi_sig_xfer;
+	slave.sdev.clk_mhz = 13;
+	slave.sdev.priv = &slave;
+	slave.sdev.name = "spi_sspi";
+	slave_device.dev.release = sspi_release;
+
+	/* Initialize platform device. */
+	slave_device.name = "cfspi_sspi";
+	slave_device.dev.platform_data = &slave.sdev;
+
+	/* Register platform device. */
+	res = platform_device_register(&slave_device);
+	if (res) {
+		printk(KERN_WARNING "sspi_init: failed to register dev.\n");
+		return -ENODEV;
+	}
+
+	return res;
+}
+
+static void __exit sspi_exit(void)
+{
+	platform_device_del(&slave_device);
+}
+
+module_init(sspi_init);
+module_exit(sspi_exit);
diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig
index 6f33ee453f41..631a6242b011 100644
--- a/drivers/net/caif/Kconfig
+++ b/drivers/net/caif/Kconfig
@@ -12,3 +12,22 @@ config CAIF_TTY
 	The CAIF TTY transport driver is a Line Discipline (ldisc)
 	identified as N_CAIF. When this ldisc is opened from user space
 	it will redirect the TTY's traffic into the CAIF stack.
+
+config CAIF_SPI_SLAVE
+	tristate "CAIF SPI transport driver for slave interface"
+	depends on CAIF
+	default n
+	---help---
+	The CAIF Link layer SPI Protocol driver for Slave SPI interface.
+	This driver implements a platform driver to accommodate for a
+	platform specific SPI device. A sample CAIF SPI Platform device is
+	provided in Documentation/networking/caif/spi_porting.txt
+
+config CAIF_SPI_SYNC
+	bool "Next command and length in start of frame"
+	depends on CAIF_SPI_SLAVE
+	default n
+	---help---
+	Putting the next command and length in the start of the frame can
+	help to synchronize to the next transfer in case of over or under-runs.
+	This option also needs to be enabled on the modem.
diff --git a/drivers/net/caif/Makefile b/drivers/net/caif/Makefile
index e6d3ca06ecfe..3a11d619452b 100644
--- a/drivers/net/caif/Makefile
+++ b/drivers/net/caif/Makefile
@@ -4,3 +4,7 @@ endif
 
 # Serial interface
 obj-$(CONFIG_CAIF_TTY) += caif_serial.o
+
+# SPI slave physical interfaces module
+cfspi_slave-objs := caif_spi.o caif_spi_slave.o
+obj-$(CONFIG_CAIF_SPI_SLAVE) += cfspi_slave.o
diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c
new file mode 100644
index 000000000000..03049e86e8ae
--- /dev/null
+++ b/drivers/net/caif/caif_spi.c
@@ -0,0 +1,847 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com
+ * Author:  Daniel Martensson / Daniel.Martensson@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2.
+ */
+
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/debugfs.h>
+#include <linux/if_arp.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/caif_spi.h>
+
+#ifndef CONFIG_CAIF_SPI_SYNC
+#define FLAVOR "Flavour: Vanilla.\n"
+#else
+#define FLAVOR "Flavour: Master CMD&LEN at start.\n"
+#endif /* CONFIG_CAIF_SPI_SYNC */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Daniel Martensson<daniel.martensson@stericsson.com>");
+MODULE_DESCRIPTION("CAIF SPI driver");
+
+static int spi_loop;
+module_param(spi_loop, bool, S_IRUGO);
+MODULE_PARM_DESC(spi_loop, "SPI running in loopback mode.");
+
+/* SPI frame alignment. */
+module_param(spi_frm_align, int, S_IRUGO);
+MODULE_PARM_DESC(spi_frm_align, "SPI frame alignment.");
+
+/* SPI padding options. */
+module_param(spi_up_head_align, int, S_IRUGO);
+MODULE_PARM_DESC(spi_up_head_align, "SPI uplink head alignment.");
+
+module_param(spi_up_tail_align, int, S_IRUGO);
+MODULE_PARM_DESC(spi_up_tail_align, "SPI uplink tail alignment.");
+
+module_param(spi_down_head_align, int, S_IRUGO);
+MODULE_PARM_DESC(spi_down_head_align, "SPI downlink head alignment.");
+
+module_param(spi_down_tail_align, int, S_IRUGO);
+MODULE_PARM_DESC(spi_down_tail_align, "SPI downlink tail alignment.");
+
+#ifdef CONFIG_ARM
+#define BYTE_HEX_FMT "%02X"
+#else
+#define BYTE_HEX_FMT "%02hhX"
+#endif
+
+#define SPI_MAX_PAYLOAD_SIZE 4096
+/*
+ * Threshold values for the SPI packet queue. Flowcontrol will be asserted
+ * when the number of packets exceeds HIGH_WATER_MARK. It will not be
+ * deasserted before the number of packets drops below LOW_WATER_MARK.
+ */
+#define LOW_WATER_MARK   100
+#define HIGH_WATER_MARK  (LOW_WATER_MARK*5)
+
+#ifdef CONFIG_UML
+
+/*
+ * We sometimes use UML for debugging, but it cannot handle
+ * dma_alloc_coherent so we have to wrap it.
+ */
+static inline void *dma_alloc(dma_addr_t *daddr)
+{
+	return kmalloc(SPI_DMA_BUF_LEN, GFP_KERNEL);
+}
+
+static inline void dma_free(void *cpu_addr, dma_addr_t handle)
+{
+	kfree(cpu_addr);
+}
+
+#else
+
+static inline void *dma_alloc(dma_addr_t *daddr)
+{
+	return dma_alloc_coherent(NULL, SPI_DMA_BUF_LEN, daddr,
+				GFP_KERNEL);
+}
+
+static inline void dma_free(void *cpu_addr, dma_addr_t handle)
+{
+	dma_free_coherent(NULL, SPI_DMA_BUF_LEN, cpu_addr, handle);
+}
+#endif	/* CONFIG_UML */
+
+#ifdef CONFIG_DEBUG_FS
+
+#define DEBUGFS_BUF_SIZE	4096
+
+static struct dentry *dbgfs_root;
+
+static inline void driver_debugfs_create(void)
+{
+	dbgfs_root = debugfs_create_dir(cfspi_spi_driver.driver.name, NULL);
+}
+
+static inline void driver_debugfs_remove(void)
+{
+	debugfs_remove(dbgfs_root);
+}
+
+static inline void dev_debugfs_rem(struct cfspi *cfspi)
+{
+	debugfs_remove(cfspi->dbgfs_frame);
+	debugfs_remove(cfspi->dbgfs_state);
+	debugfs_remove(cfspi->dbgfs_dir);
+}
+
+static int dbgfs_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t dbgfs_state(struct file *file, char __user *user_buf,
+			   size_t count, loff_t *ppos)
+{
+	char *buf;
+	int len = 0;
+	ssize_t size;
+	struct cfspi *cfspi = (struct cfspi *)file->private_data;
+
+	buf = kzalloc(DEBUGFS_BUF_SIZE, GFP_KERNEL);
+	if (!buf)
+		return 0;
+
+	/* Print out debug information. */
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"CAIF SPI debug information:\n");
+
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), FLAVOR);
+
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"STATE: %d\n", cfspi->dbg_state);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Previous CMD: 0x%x\n", cfspi->pcmd);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Current CMD: 0x%x\n", cfspi->cmd);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Previous TX len: %d\n", cfspi->tx_ppck_len);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Previous RX len: %d\n", cfspi->rx_ppck_len);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Current TX len: %d\n", cfspi->tx_cpck_len);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Current RX len: %d\n", cfspi->rx_cpck_len);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Next TX len: %d\n", cfspi->tx_npck_len);
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Next RX len: %d\n", cfspi->rx_npck_len);
+
+	size = simple_read_from_buffer(user_buf, count, ppos, buf, len);
+	kfree(buf);
+
+	return size;
+}
+
+static ssize_t print_frame(char *buf, size_t size, char *frm,
+			   size_t count, size_t cut)
+{
+	int len = 0;
+	int i;
+	for (i = 0; i < count; i++) {
+		len += snprintf((buf + len), (size - len),
+					"[0x" BYTE_HEX_FMT "]",
+					frm[i]);
+		if ((i == cut) && (count > (cut * 2))) {
+			/* Fast forward. */
+			i = count - cut;
+			len += snprintf((buf + len), (size - len),
+					"--- %u bytes skipped ---\n",
+					(int)(count - (cut * 2)));
+		}
+
+		if ((!(i % 10)) && i) {
+			len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+					"\n");
+		}
+	}
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), "\n");
+	return len;
+}
+
+static ssize_t dbgfs_frame(struct file *file, char __user *user_buf,
+			   size_t count, loff_t *ppos)
+{
+	char *buf;
+	int len = 0;
+	ssize_t size;
+	struct cfspi *cfspi;
+
+	cfspi = (struct cfspi *)file->private_data;
+	buf = kzalloc(DEBUGFS_BUF_SIZE, GFP_KERNEL);
+	if (!buf)
+		return 0;
+
+	/* Print out debug information. */
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Current frame:\n");
+
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Tx data (Len: %d):\n", cfspi->tx_cpck_len);
+
+	len += print_frame((buf + len), (DEBUGFS_BUF_SIZE - len),
+			   cfspi->xfer.va_tx,
+			   (cfspi->tx_cpck_len + SPI_CMD_SZ), 100);
+
+	len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len),
+			"Rx data (Len: %d):\n", cfspi->rx_cpck_len);
+
+	len += print_frame((buf + len), (DEBUGFS_BUF_SIZE - len),
+			   cfspi->xfer.va_rx,
+			   (cfspi->rx_cpck_len + SPI_CMD_SZ), 100);
+
+	size = simple_read_from_buffer(user_buf, count, ppos, buf, len);
+	kfree(buf);
+
+	return size;
+}
+
+static const struct file_operations dbgfs_state_fops = {
+	.open = dbgfs_open,
+	.read = dbgfs_state,
+	.owner = THIS_MODULE
+};
+
+static const struct file_operations dbgfs_frame_fops = {
+	.open = dbgfs_open,
+	.read = dbgfs_frame,
+	.owner = THIS_MODULE
+};
+
+static inline void dev_debugfs_add(struct cfspi *cfspi)
+{
+	cfspi->dbgfs_dir = debugfs_create_dir(cfspi->pdev->name, dbgfs_root);
+	cfspi->dbgfs_state = debugfs_create_file("state", S_IRUGO,
+						 cfspi->dbgfs_dir, cfspi,
+						 &dbgfs_state_fops);
+	cfspi->dbgfs_frame = debugfs_create_file("frame", S_IRUGO,
+						 cfspi->dbgfs_dir, cfspi,
+						 &dbgfs_frame_fops);
+}
+
+inline void cfspi_dbg_state(struct cfspi *cfspi, int state)
+{
+	cfspi->dbg_state = state;
+};
+#else
+
+static inline void driver_debugfs_create(void)
+{
+}
+
+static inline void driver_debugfs_remove(void)
+{
+}
+
+static inline void dev_debugfs_add(struct cfspi *cfspi)
+{
+}
+
+static inline void dev_debugfs_rem(struct cfspi *cfspi)
+{
+}
+
+inline void cfspi_dbg_state(struct cfspi *cfspi, int state)
+{
+}
+#endif				/* CONFIG_DEBUG_FS */
+
+static LIST_HEAD(cfspi_list);
+static spinlock_t cfspi_list_lock;
+
+/* SPI uplink head alignment. */
+static ssize_t show_up_head_align(struct device_driver *driver, char *buf)
+{
+	return sprintf(buf, "%d\n", spi_up_head_align);
+}
+
+static DRIVER_ATTR(up_head_align, S_IRUSR, show_up_head_align, NULL);
+
+/* SPI uplink tail alignment. */
+static ssize_t show_up_tail_align(struct device_driver *driver, char *buf)
+{
+	return sprintf(buf, "%d\n", spi_up_tail_align);
+}
+
+static DRIVER_ATTR(up_tail_align, S_IRUSR, show_up_tail_align, NULL);
+
+/* SPI downlink head alignment. */
+static ssize_t show_down_head_align(struct device_driver *driver, char *buf)
+{
+	return sprintf(buf, "%d\n", spi_down_head_align);
+}
+
+static DRIVER_ATTR(down_head_align, S_IRUSR, show_down_head_align, NULL);
+
+/* SPI downlink tail alignment. */
+static ssize_t show_down_tail_align(struct device_driver *driver, char *buf)
+{
+	return sprintf(buf, "%d\n", spi_down_tail_align);
+}
+
+static DRIVER_ATTR(down_tail_align, S_IRUSR, show_down_tail_align, NULL);
+
+/* SPI frame alignment. */
+static ssize_t show_frame_align(struct device_driver *driver, char *buf)
+{
+	return sprintf(buf, "%d\n", spi_frm_align);
+}
+
+static DRIVER_ATTR(frame_align, S_IRUSR, show_frame_align, NULL);
+
+int cfspi_xmitfrm(struct cfspi *cfspi, u8 *buf, size_t len)
+{
+	u8 *dst = buf;
+	caif_assert(buf);
+
+	do {
+		struct sk_buff *skb;
+		struct caif_payload_info *info;
+		int spad = 0;
+		int epad;
+
+		skb = skb_dequeue(&cfspi->chead);
+		if (!skb)
+			break;
+
+		/*
+		 * Calculate length of frame including SPI padding.
+		 * The payload position is found in the control buffer.
+		 */
+		info = (struct caif_payload_info *)&skb->cb;
+
+		/*
+		 * Compute head offset i.e. number of bytes to add to
+		 * get the start of the payload aligned.
+		 */
+		if (spi_up_head_align) {
+			spad = 1 + ((info->hdr_len + 1) & spi_up_head_align);
+			*dst = (u8)(spad - 1);
+			dst += spad;
+		}
+
+		/* Copy in CAIF frame. */
+		skb_copy_bits(skb, 0, dst, skb->len);
+		dst += skb->len;
+		cfspi->ndev->stats.tx_packets++;
+		cfspi->ndev->stats.tx_bytes += skb->len;
+
+		/*
+		 * Compute tail offset i.e. number of bytes to add to
+		 * get the complete CAIF frame aligned.
+		 */
+		epad = (skb->len + spad) & spi_up_tail_align;
+		dst += epad;
+
+		dev_kfree_skb(skb);
+
+	} while ((dst - buf) < len);
+
+	return dst - buf;
+}
+
+int cfspi_xmitlen(struct cfspi *cfspi)
+{
+	struct sk_buff *skb = NULL;
+	int frm_len = 0;
+	int pkts = 0;
+
+	/*
+	 * Decommit previously commited frames.
+	 * skb_queue_splice_tail(&cfspi->chead,&cfspi->qhead)
+	 */
+	while (skb_peek(&cfspi->chead)) {
+		skb = skb_dequeue_tail(&cfspi->chead);
+		skb_queue_head(&cfspi->qhead, skb);
+	}
+
+	do {
+		struct caif_payload_info *info = NULL;
+		int spad = 0;
+		int epad = 0;
+
+		skb = skb_dequeue(&cfspi->qhead);
+		if (!skb)
+			break;
+
+		/*
+		 * Calculate length of frame including SPI padding.
+		 * The payload position is found in the control buffer.
+		 */
+		info = (struct caif_payload_info *)&skb->cb;
+
+		/*
+		 * Compute head offset i.e. number of bytes to add to
+		 * get the start of the payload aligned.
+		 */
+		if (spi_up_head_align)
+			spad = 1 + ((info->hdr_len + 1) & spi_up_head_align);
+
+		/*
+		 * Compute tail offset i.e. number of bytes to add to
+		 * get the complete CAIF frame aligned.
+		 */
+		epad = (skb->len + spad) & spi_up_tail_align;
+
+		if ((skb->len + spad + epad + frm_len) <= CAIF_MAX_SPI_FRAME) {
+			skb_queue_tail(&cfspi->chead, skb);
+			pkts++;
+			frm_len += skb->len + spad + epad;
+		} else {
+			/* Put back packet. */
+			skb_queue_head(&cfspi->qhead, skb);
+		}
+	} while (pkts <= CAIF_MAX_SPI_PKTS);
+
+	/*
+	 * Send flow on if previously sent flow off
+	 * and now go below the low water mark
+	 */
+	if (cfspi->flow_off_sent && cfspi->qhead.qlen < cfspi->qd_low_mark &&
+		cfspi->cfdev.flowctrl) {
+		cfspi->flow_off_sent = 0;
+		cfspi->cfdev.flowctrl(cfspi->ndev, 1);
+	}
+
+	return frm_len;
+}
+
+static void cfspi_ss_cb(bool assert, struct cfspi_ifc *ifc)
+{
+	struct cfspi *cfspi = (struct cfspi *)ifc->priv;
+
+	if (!in_interrupt())
+		spin_lock(&cfspi->lock);
+	if (assert) {
+		set_bit(SPI_SS_ON, &cfspi->state);
+		set_bit(SPI_XFER, &cfspi->state);
+	} else {
+		set_bit(SPI_SS_OFF, &cfspi->state);
+	}
+	if (!in_interrupt())
+		spin_unlock(&cfspi->lock);
+
+	/* Wake up the xfer thread. */
+	wake_up_interruptible(&cfspi->wait);
+}
+
+static void cfspi_xfer_done_cb(struct cfspi_ifc *ifc)
+{
+	struct cfspi *cfspi = (struct cfspi *)ifc->priv;
+
+	/* Transfer done, complete work queue */
+	complete(&cfspi->comp);
+}
+
+static int cfspi_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct cfspi *cfspi = NULL;
+	unsigned long flags;
+	if (!dev)
+		return -EINVAL;
+
+	cfspi = netdev_priv(dev);
+
+	skb_queue_tail(&cfspi->qhead, skb);
+
+	spin_lock_irqsave(&cfspi->lock, flags);
+	if (!test_and_set_bit(SPI_XFER, &cfspi->state)) {
+		/* Wake up xfer thread. */
+		wake_up_interruptible(&cfspi->wait);
+	}
+	spin_unlock_irqrestore(&cfspi->lock, flags);
+
+	/* Send flow off if number of bytes is above high water mark */
+	if (!cfspi->flow_off_sent &&
+		cfspi->qhead.qlen > cfspi->qd_high_mark &&
+		cfspi->cfdev.flowctrl) {
+		cfspi->flow_off_sent = 1;
+		cfspi->cfdev.flowctrl(cfspi->ndev, 0);
+	}
+
+	return 0;
+}
+
+int cfspi_rxfrm(struct cfspi *cfspi, u8 *buf, size_t len)
+{
+	u8 *src = buf;
+
+	caif_assert(buf != NULL);
+
+	do {
+		int res;
+		struct sk_buff *skb = NULL;
+		int spad = 0;
+		int epad = 0;
+		u8 *dst = NULL;
+		int pkt_len = 0;
+
+		/*
+		 * Compute head offset i.e. number of bytes added to
+		 * get the start of the payload aligned.
+		 */
+		if (spi_down_head_align) {
+			spad = 1 + *src;
+			src += spad;
+		}
+
+		/* Read length of CAIF frame (little endian). */
+		pkt_len = *src;
+		pkt_len |= ((*(src+1)) << 8) & 0xFF00;
+		pkt_len += 2;	/* Add FCS fields. */
+
+		/* Get a suitable caif packet and copy in data. */
+
+		skb = netdev_alloc_skb(cfspi->ndev, pkt_len + 1);
+		caif_assert(skb != NULL);
+
+		dst = skb_put(skb, pkt_len);
+		memcpy(dst, src, pkt_len);
+		src += pkt_len;
+
+		skb->protocol = htons(ETH_P_CAIF);
+		skb_reset_mac_header(skb);
+		skb->dev = cfspi->ndev;
+
+		/*
+		 * Push received packet up the stack.
+		 */
+		if (!spi_loop)
+			res = netif_rx_ni(skb);
+		else
+			res = cfspi_xmit(skb, cfspi->ndev);
+
+		if (!res) {
+			cfspi->ndev->stats.rx_packets++;
+			cfspi->ndev->stats.rx_bytes += pkt_len;
+		} else
+			cfspi->ndev->stats.rx_dropped++;
+
+		/*
+		 * Compute tail offset i.e. number of bytes added to
+		 * get the complete CAIF frame aligned.
+		 */
+		epad = (pkt_len + spad) & spi_down_tail_align;
+		src += epad;
+	} while ((src - buf) < len);
+
+	return src - buf;
+}
+
+static int cfspi_open(struct net_device *dev)
+{
+	netif_wake_queue(dev);
+	return 0;
+}
+
+static int cfspi_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+	return 0;
+}
+static const struct net_device_ops cfspi_ops = {
+	.ndo_open = cfspi_open,
+	.ndo_stop = cfspi_close,
+	.ndo_start_xmit = cfspi_xmit
+};
+
+static void cfspi_setup(struct net_device *dev)
+{
+	struct cfspi *cfspi = netdev_priv(dev);
+	dev->features = 0;
+	dev->netdev_ops = &cfspi_ops;
+	dev->type = ARPHRD_CAIF;
+	dev->flags = IFF_NOARP | IFF_POINTOPOINT;
+	dev->tx_queue_len = 0;
+	dev->mtu = SPI_MAX_PAYLOAD_SIZE;
+	dev->destructor = free_netdev;
+	skb_queue_head_init(&cfspi->qhead);
+	skb_queue_head_init(&cfspi->chead);
+	cfspi->cfdev.link_select = CAIF_LINK_HIGH_BANDW;
+	cfspi->cfdev.use_frag = false;
+	cfspi->cfdev.use_stx = false;
+	cfspi->cfdev.use_fcs = false;
+	cfspi->ndev = dev;
+}
+
+int cfspi_spi_probe(struct platform_device *pdev)
+{
+	struct cfspi *cfspi = NULL;
+	struct net_device *ndev;
+	struct cfspi_dev *dev;
+	int res;
+	dev = (struct cfspi_dev *)pdev->dev.platform_data;
+
+	ndev = alloc_netdev(sizeof(struct cfspi),
+			"cfspi%d", cfspi_setup);
+	if (!dev)
+		return -ENODEV;
+
+	cfspi = netdev_priv(ndev);
+	netif_stop_queue(ndev);
+	cfspi->ndev = ndev;
+	cfspi->pdev = pdev;
+
+	/* Set flow info */
+	cfspi->flow_off_sent = 0;
+	cfspi->qd_low_mark = LOW_WATER_MARK;
+	cfspi->qd_high_mark = HIGH_WATER_MARK;
+
+	/* Assign the SPI device. */
+	cfspi->dev = dev;
+	/* Assign the device ifc to this SPI interface. */
+	dev->ifc = &cfspi->ifc;
+
+	/* Allocate DMA buffers. */
+	cfspi->xfer.va_tx = dma_alloc(&cfspi->xfer.pa_tx);
+	if (!cfspi->xfer.va_tx) {
+		printk(KERN_WARNING
+		       "CFSPI: failed to allocate dma TX buffer.\n");
+		res = -ENODEV;
+		goto err_dma_alloc_tx;
+	}
+
+	cfspi->xfer.va_rx = dma_alloc(&cfspi->xfer.pa_rx);
+
+	if (!cfspi->xfer.va_rx) {
+		printk(KERN_WARNING
+		       "CFSPI: failed to allocate dma TX buffer.\n");
+		res = -ENODEV;
+		goto err_dma_alloc_rx;
+	}
+
+	/* Initialize the work queue. */
+	INIT_WORK(&cfspi->work, cfspi_xfer);
+
+	/* Initialize spin locks. */
+	spin_lock_init(&cfspi->lock);
+
+	/* Initialize flow control state. */
+	cfspi->flow_stop = false;
+
+	/* Initialize wait queue. */
+	init_waitqueue_head(&cfspi->wait);
+
+	/* Create work thread. */
+	cfspi->wq = create_singlethread_workqueue(dev->name);
+	if (!cfspi->wq) {
+		printk(KERN_WARNING "CFSPI: failed to create work queue.\n");
+		res = -ENODEV;
+		goto err_create_wq;
+	}
+
+	/* Initialize work queue. */
+	init_completion(&cfspi->comp);
+
+	/* Create debugfs entries. */
+	dev_debugfs_add(cfspi);
+
+	/* Set up the ifc. */
+	cfspi->ifc.ss_cb = cfspi_ss_cb;
+	cfspi->ifc.xfer_done_cb = cfspi_xfer_done_cb;
+	cfspi->ifc.priv = cfspi;
+
+	/* Add CAIF SPI device to list. */
+	spin_lock(&cfspi_list_lock);
+	list_add_tail(&cfspi->list, &cfspi_list);
+	spin_unlock(&cfspi_list_lock);
+
+	/* Schedule the work queue. */
+	queue_work(cfspi->wq, &cfspi->work);
+
+	/* Register network device. */
+	res = register_netdev(ndev);
+	if (res) {
+		printk(KERN_ERR "CFSPI: Reg. error: %d.\n", res);
+		goto err_net_reg;
+	}
+	return res;
+
+ err_net_reg:
+	dev_debugfs_rem(cfspi);
+	set_bit(SPI_TERMINATE, &cfspi->state);
+	wake_up_interruptible(&cfspi->wait);
+	destroy_workqueue(cfspi->wq);
+ err_create_wq:
+	dma_free(cfspi->xfer.va_rx, cfspi->xfer.pa_rx);
+ err_dma_alloc_rx:
+	dma_free(cfspi->xfer.va_tx, cfspi->xfer.pa_tx);
+ err_dma_alloc_tx:
+	free_netdev(ndev);
+
+	return res;
+}
+
+int cfspi_spi_remove(struct platform_device *pdev)
+{
+	struct list_head *list_node;
+	struct list_head *n;
+	struct cfspi *cfspi = NULL;
+	struct cfspi_dev *dev;
+
+	dev = (struct cfspi_dev *)pdev->dev.platform_data;
+	spin_lock(&cfspi_list_lock);
+	list_for_each_safe(list_node, n, &cfspi_list) {
+		cfspi = list_entry(list_node, struct cfspi, list);
+		/* Find the corresponding device. */
+		if (cfspi->dev == dev) {
+			/* Remove from list. */
+			list_del(list_node);
+			/* Free DMA buffers. */
+			dma_free(cfspi->xfer.va_rx, cfspi->xfer.pa_rx);
+			dma_free(cfspi->xfer.va_tx, cfspi->xfer.pa_tx);
+			set_bit(SPI_TERMINATE, &cfspi->state);
+			wake_up_interruptible(&cfspi->wait);
+			destroy_workqueue(cfspi->wq);
+			/* Destroy debugfs directory and files. */
+			dev_debugfs_rem(cfspi);
+			unregister_netdev(cfspi->ndev);
+			spin_unlock(&cfspi_list_lock);
+			return 0;
+		}
+	}
+	spin_unlock(&cfspi_list_lock);
+	return -ENODEV;
+}
+
+static void __exit cfspi_exit_module(void)
+{
+	struct list_head *list_node;
+	struct list_head *n;
+	struct cfspi *cfspi = NULL;
+
+	list_for_each_safe(list_node, n, &cfspi_list) {
+		cfspi = list_entry(list_node, struct cfspi, list);
+		platform_device_unregister(cfspi->pdev);
+	}
+
+	/* Destroy sysfs files. */
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_up_head_align);
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_up_tail_align);
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_down_head_align);
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_down_tail_align);
+	driver_remove_file(&cfspi_spi_driver.driver, &driver_attr_frame_align);
+	/* Unregister platform driver. */
+	platform_driver_unregister(&cfspi_spi_driver);
+	/* Destroy debugfs root directory. */
+	driver_debugfs_remove();
+}
+
+static int __init cfspi_init_module(void)
+{
+	int result;
+
+	/* Initialize spin lock. */
+	spin_lock_init(&cfspi_list_lock);
+
+	/* Register platform driver. */
+	result = platform_driver_register(&cfspi_spi_driver);
+	if (result) {
+		printk(KERN_ERR "Could not register platform SPI driver.\n");
+		goto err_dev_register;
+	}
+
+	/* Create sysfs files. */
+	result =
+	    driver_create_file(&cfspi_spi_driver.driver,
+			       &driver_attr_up_head_align);
+	if (result) {
+		printk(KERN_ERR "Sysfs creation failed 1.\n");
+		goto err_create_up_head_align;
+	}
+
+	result =
+	    driver_create_file(&cfspi_spi_driver.driver,
+			       &driver_attr_up_tail_align);
+	if (result) {
+		printk(KERN_ERR "Sysfs creation failed 2.\n");
+		goto err_create_up_tail_align;
+	}
+
+	result =
+	    driver_create_file(&cfspi_spi_driver.driver,
+			       &driver_attr_down_head_align);
+	if (result) {
+		printk(KERN_ERR "Sysfs creation failed 3.\n");
+		goto err_create_down_head_align;
+	}
+
+	result =
+	    driver_create_file(&cfspi_spi_driver.driver,
+			       &driver_attr_down_tail_align);
+	if (result) {
+		printk(KERN_ERR "Sysfs creation failed 4.\n");
+		goto err_create_down_tail_align;
+	}
+
+	result =
+	    driver_create_file(&cfspi_spi_driver.driver,
+			       &driver_attr_frame_align);
+	if (result) {
+		printk(KERN_ERR "Sysfs creation failed 5.\n");
+		goto err_create_frame_align;
+	}
+	driver_debugfs_create();
+	return result;
+
+ err_create_frame_align:
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_down_tail_align);
+ err_create_down_tail_align:
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_down_head_align);
+ err_create_down_head_align:
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_up_tail_align);
+ err_create_up_tail_align:
+	driver_remove_file(&cfspi_spi_driver.driver,
+			   &driver_attr_up_head_align);
+ err_create_up_head_align:
+ err_dev_register:
+	return result;
+}
+
+module_init(cfspi_init_module);
+module_exit(cfspi_exit_module);
diff --git a/drivers/net/caif/caif_spi_slave.c b/drivers/net/caif/caif_spi_slave.c
new file mode 100644
index 000000000000..077ccf840edf
--- /dev/null
+++ b/drivers/net/caif/caif_spi_slave.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com
+ * Author:  Daniel Martensson / Daniel.Martensson@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2.
+ */
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/semaphore.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/debugfs.h>
+#include <net/caif/caif_spi.h>
+
+#ifndef CONFIG_CAIF_SPI_SYNC
+#define SPI_DATA_POS SPI_CMD_SZ
+static inline int forward_to_spi_cmd(struct cfspi *cfspi)
+{
+	return cfspi->rx_cpck_len;
+}
+#else
+#define SPI_DATA_POS 0
+static inline int forward_to_spi_cmd(struct cfspi *cfspi)
+{
+	return 0;
+}
+#endif
+
+int spi_frm_align = 2;
+int spi_up_head_align = 1;
+int spi_up_tail_align;
+int spi_down_head_align = 3;
+int spi_down_tail_align = 1;
+
+#ifdef CONFIG_DEBUG_FS
+static inline void debugfs_store_prev(struct cfspi *cfspi)
+{
+	/* Store previous command for debugging reasons.*/
+	cfspi->pcmd = cfspi->cmd;
+	/* Store previous transfer. */
+	cfspi->tx_ppck_len = cfspi->tx_cpck_len;
+	cfspi->rx_ppck_len = cfspi->rx_cpck_len;
+}
+#else
+static inline void debugfs_store_prev(struct cfspi *cfspi)
+{
+}
+#endif
+
+void cfspi_xfer(struct work_struct *work)
+{
+	struct cfspi *cfspi;
+	u8 *ptr = NULL;
+	unsigned long flags;
+	int ret;
+	cfspi = container_of(work, struct cfspi, work);
+
+	/* Initialize state. */
+	cfspi->cmd = SPI_CMD_EOT;
+
+	for (;;) {
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_WAITING);
+
+		/* Wait for master talk or transmit event. */
+		wait_event_interruptible(cfspi->wait,
+				 test_bit(SPI_XFER, &cfspi->state) ||
+				 test_bit(SPI_TERMINATE, &cfspi->state));
+
+		if (test_bit(SPI_TERMINATE, &cfspi->state))
+			return;
+
+#if CFSPI_DBG_PREFILL
+		/* Prefill buffers for easier debugging. */
+		memset(cfspi->xfer.va_tx, 0xFF, SPI_DMA_BUF_LEN);
+		memset(cfspi->xfer.va_rx, 0xFF, SPI_DMA_BUF_LEN);
+#endif	/* CFSPI_DBG_PREFILL */
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_AWAKE);
+
+	/* Check whether we have a committed frame. */
+		if (cfspi->tx_cpck_len) {
+			int len;
+
+			cfspi_dbg_state(cfspi, CFSPI_STATE_FETCH_PKT);
+
+			/* Copy commited SPI frames after the SPI indication. */
+			ptr = (u8 *) cfspi->xfer.va_tx;
+			ptr += SPI_IND_SZ;
+			len = cfspi_xmitfrm(cfspi, ptr, cfspi->tx_cpck_len);
+			WARN_ON(len != cfspi->tx_cpck_len);
+	}
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_GET_NEXT);
+
+		/* Get length of next frame to commit. */
+		cfspi->tx_npck_len = cfspi_xmitlen(cfspi);
+
+		WARN_ON(cfspi->tx_npck_len > SPI_DMA_BUF_LEN);
+
+		/*
+		 * Add indication and length at the beginning of the frame,
+		 * using little endian.
+		 */
+		ptr = (u8 *) cfspi->xfer.va_tx;
+		*ptr++ = SPI_CMD_IND;
+		*ptr++ = (SPI_CMD_IND  & 0xFF00) >> 8;
+		*ptr++ = cfspi->tx_npck_len & 0x00FF;
+		*ptr++ = (cfspi->tx_npck_len & 0xFF00) >> 8;
+
+		/* Calculate length of DMAs. */
+		cfspi->xfer.tx_dma_len = cfspi->tx_cpck_len + SPI_IND_SZ;
+		cfspi->xfer.rx_dma_len = cfspi->rx_cpck_len + SPI_CMD_SZ;
+
+		/* Add SPI TX frame alignment padding, if necessary. */
+		if (cfspi->tx_cpck_len &&
+			(cfspi->xfer.tx_dma_len % spi_frm_align)) {
+
+			cfspi->xfer.tx_dma_len += spi_frm_align -
+			    (cfspi->xfer.tx_dma_len % spi_frm_align);
+		}
+
+		/* Add SPI RX frame alignment padding, if necessary. */
+		if (cfspi->rx_cpck_len &&
+			(cfspi->xfer.rx_dma_len % spi_frm_align)) {
+
+			cfspi->xfer.rx_dma_len += spi_frm_align -
+			    (cfspi->xfer.rx_dma_len % spi_frm_align);
+		}
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_INIT_XFER);
+
+		/* Start transfer. */
+		ret = cfspi->dev->init_xfer(&cfspi->xfer, cfspi->dev);
+		WARN_ON(ret);
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_WAIT_ACTIVE);
+
+		/*
+		 * TODO: We might be able to make an assumption if this is the
+		 * first loop. Make sure that minimum toggle time is respected.
+		 */
+		udelay(MIN_TRANSITION_TIME_USEC);
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_SIG_ACTIVE);
+
+		/* Signal that we are ready to recieve data. */
+		cfspi->dev->sig_xfer(true, cfspi->dev);
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_WAIT_XFER_DONE);
+
+		/* Wait for transfer completion. */
+		wait_for_completion(&cfspi->comp);
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_XFER_DONE);
+
+		if (cfspi->cmd == SPI_CMD_EOT) {
+			/*
+			 * Clear the master talk bit. A xfer is always at
+			 *  least two bursts.
+			 */
+			clear_bit(SPI_SS_ON, &cfspi->state);
+		}
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_WAIT_INACTIVE);
+
+		/* Make sure that the minimum toggle time is respected. */
+		if (SPI_XFER_TIME_USEC(cfspi->xfer.tx_dma_len,
+					cfspi->dev->clk_mhz) <
+			MIN_TRANSITION_TIME_USEC) {
+
+			udelay(MIN_TRANSITION_TIME_USEC -
+				SPI_XFER_TIME_USEC
+				(cfspi->xfer.tx_dma_len, cfspi->dev->clk_mhz));
+		}
+
+		cfspi_dbg_state(cfspi, CFSPI_STATE_SIG_INACTIVE);
+
+		/* De-assert transfer signal. */
+		cfspi->dev->sig_xfer(false, cfspi->dev);
+
+		/* Check whether we received a CAIF packet. */
+		if (cfspi->rx_cpck_len) {
+			int len;
+
+			cfspi_dbg_state(cfspi, CFSPI_STATE_DELIVER_PKT);
+
+			/* Parse SPI frame. */
+			ptr = ((u8 *)(cfspi->xfer.va_rx + SPI_DATA_POS));
+
+			len = cfspi_rxfrm(cfspi, ptr, cfspi->rx_cpck_len);
+			WARN_ON(len != cfspi->rx_cpck_len);
+		}
+
+		/* Check the next SPI command and length. */
+		ptr = (u8 *) cfspi->xfer.va_rx;
+
+		ptr += forward_to_spi_cmd(cfspi);
+
+		cfspi->cmd = *ptr++;
+		cfspi->cmd |= ((*ptr++) << 8) & 0xFF00;
+		cfspi->rx_npck_len = *ptr++;
+		cfspi->rx_npck_len |= ((*ptr++) << 8) & 0xFF00;
+
+		WARN_ON(cfspi->rx_npck_len > SPI_DMA_BUF_LEN);
+		WARN_ON(cfspi->cmd > SPI_CMD_EOT);
+
+		debugfs_store_prev(cfspi);
+
+		/* Check whether the master issued an EOT command. */
+		if (cfspi->cmd == SPI_CMD_EOT) {
+			/* Reset state. */
+			cfspi->tx_cpck_len = 0;
+			cfspi->rx_cpck_len = 0;
+		} else {
+			/* Update state. */
+			cfspi->tx_cpck_len = cfspi->tx_npck_len;
+			cfspi->rx_cpck_len = cfspi->rx_npck_len;
+		}
+
+		/*
+		 * Check whether we need to clear the xfer bit.
+		 * Spin lock needed for packet insertion.
+		 * Test and clear of different bits
+		 * are not supported.
+		 */
+		spin_lock_irqsave(&cfspi->lock, flags);
+		if (cfspi->cmd == SPI_CMD_EOT && !cfspi_xmitlen(cfspi)
+			&& !test_bit(SPI_SS_ON, &cfspi->state))
+			clear_bit(SPI_XFER, &cfspi->state);
+
+		spin_unlock_irqrestore(&cfspi->lock, flags);
+	}
+}
+
+struct platform_driver cfspi_spi_driver = {
+	.probe = cfspi_spi_probe,
+	.remove = cfspi_spi_remove,
+	.driver = {
+		   .name = "cfspi_sspi",
+		   .owner = THIS_MODULE,
+		   },
+};
diff --git a/include/net/caif/caif_spi.h b/include/net/caif/caif_spi.h
new file mode 100644
index 000000000000..ce4570dff020
--- /dev/null
+++ b/include/net/caif/caif_spi.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Daniel Martensson / Daniel.Martensson@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#ifndef CAIF_SPI_H_
+#define CAIF_SPI_H_
+
+#include <net/caif/caif_device.h>
+
+#define SPI_CMD_WR			0x00
+#define SPI_CMD_RD			0x01
+#define SPI_CMD_EOT			0x02
+#define SPI_CMD_IND			0x04
+
+#define SPI_DMA_BUF_LEN			8192
+
+#define WL_SZ				2	/* 16 bits. */
+#define SPI_CMD_SZ			4	/* 32 bits. */
+#define SPI_IND_SZ			4	/* 32 bits. */
+
+#define SPI_XFER			0
+#define SPI_SS_ON			1
+#define SPI_SS_OFF			2
+#define SPI_TERMINATE			3
+
+/* Minimum time between different levels is 50 microseconds. */
+#define MIN_TRANSITION_TIME_USEC	50
+
+/* Defines for calculating duration of SPI transfers for a particular
+ * number of bytes.
+ */
+#define SPI_MASTER_CLK_MHZ		13
+#define SPI_XFER_TIME_USEC(bytes, clk) (((bytes) * 8) / clk)
+
+/* Normally this should be aligned on the modem in order to benefit from full
+ * duplex transfers. However a size of 8188 provokes errors when running with
+ * the modem. These errors occur when packet sizes approaches 4 kB of data.
+ */
+#define CAIF_MAX_SPI_FRAME 4092
+
+/* Maximum number of uplink CAIF frames that can reside in the same SPI frame.
+ * This number should correspond with the modem setting. The application side
+ * CAIF accepts any number of embedded downlink CAIF frames.
+ */
+#define CAIF_MAX_SPI_PKTS 9
+
+/* Decides if SPI buffers should be prefilled with 0xFF pattern for easier
+ * debugging. Both TX and RX buffers will be filled before the transfer.
+ */
+#define CFSPI_DBG_PREFILL		0
+
+/* Structure describing a SPI transfer. */
+struct cfspi_xfer {
+	u16 tx_dma_len;
+	u16 rx_dma_len;
+	void *va_tx;
+	dma_addr_t pa_tx;
+	void *va_rx;
+	dma_addr_t pa_rx;
+};
+
+/* Structure implemented by the SPI interface. */
+struct cfspi_ifc {
+	void (*ss_cb) (bool assert, struct cfspi_ifc *ifc);
+	void (*xfer_done_cb) (struct cfspi_ifc *ifc);
+	void *priv;
+};
+
+/* Structure implemented by SPI clients. */
+struct cfspi_dev {
+	int (*init_xfer) (struct cfspi_xfer *xfer, struct cfspi_dev *dev);
+	void (*sig_xfer) (bool xfer, struct cfspi_dev *dev);
+	struct cfspi_ifc *ifc;
+	char *name;
+	u32 clk_mhz;
+	void *priv;
+};
+
+/* Enumeration describing the CAIF SPI state. */
+enum cfspi_state {
+	CFSPI_STATE_WAITING = 0,
+	CFSPI_STATE_AWAKE,
+	CFSPI_STATE_FETCH_PKT,
+	CFSPI_STATE_GET_NEXT,
+	CFSPI_STATE_INIT_XFER,
+	CFSPI_STATE_WAIT_ACTIVE,
+	CFSPI_STATE_SIG_ACTIVE,
+	CFSPI_STATE_WAIT_XFER_DONE,
+	CFSPI_STATE_XFER_DONE,
+	CFSPI_STATE_WAIT_INACTIVE,
+	CFSPI_STATE_SIG_INACTIVE,
+	CFSPI_STATE_DELIVER_PKT,
+	CFSPI_STATE_MAX,
+};
+
+/* Structure implemented by SPI physical interfaces. */
+struct cfspi {
+	struct caif_dev_common cfdev;
+	struct net_device *ndev;
+	struct platform_device *pdev;
+	struct sk_buff_head qhead;
+	struct sk_buff_head chead;
+	u16 cmd;
+	u16 tx_cpck_len;
+	u16 tx_npck_len;
+	u16 rx_cpck_len;
+	u16 rx_npck_len;
+	struct cfspi_ifc ifc;
+	struct cfspi_xfer xfer;
+	struct cfspi_dev *dev;
+	unsigned long state;
+	struct work_struct work;
+	struct workqueue_struct *wq;
+	struct list_head list;
+	int    flow_off_sent;
+	u32 qd_low_mark;
+	u32 qd_high_mark;
+	struct completion comp;
+	wait_queue_head_t wait;
+	spinlock_t lock;
+	bool flow_stop;
+#ifdef CONFIG_DEBUG_FS
+	enum cfspi_state dbg_state;
+	u16 pcmd;
+	u16 tx_ppck_len;
+	u16 rx_ppck_len;
+	struct dentry *dbgfs_dir;
+	struct dentry *dbgfs_state;
+	struct dentry *dbgfs_frame;
+#endif				/* CONFIG_DEBUG_FS */
+};
+
+extern int spi_frm_align;
+extern int spi_up_head_align;
+extern int spi_up_tail_align;
+extern int spi_down_head_align;
+extern int spi_down_tail_align;
+extern struct platform_driver cfspi_spi_driver;
+
+void cfspi_dbg_state(struct cfspi *cfspi, int state);
+int cfspi_xmitfrm(struct cfspi *cfspi, u8 *buf, size_t len);
+int cfspi_xmitlen(struct cfspi *cfspi);
+int cfspi_rxfrm(struct cfspi *cfspi, u8 *buf, size_t len);
+int cfspi_spi_remove(struct platform_device *pdev);
+int cfspi_spi_probe(struct platform_device *pdev);
+int cfspi_xmitfrm(struct cfspi *cfspi, u8 *buf, size_t len);
+int cfspi_xmitlen(struct cfspi *cfspi);
+int cfspi_rxfrm(struct cfspi *cfspi, u8 *buf, size_t len);
+void cfspi_xfer(struct work_struct *work);
+
+#endif				/* CAIF_SPI_H_ */
-- 
cgit v1.2.3-55-g7522


From 4efd7e833591721bec21cc4730a7f6261417840f Mon Sep 17 00:00:00 2001
From: Andreas Steffen
Date: Wed, 30 Jun 2010 10:41:15 -0700
Subject: xfrm: fix XFRMA_MARK extraction in xfrm_mark_get

Determine the size of the xfrm_mark struct, not of its pointer.

Signed-off-by: Andreas Steffen <andreas.steffen@strongswan.org>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1913af67c43d..fc8f36dd0f5c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1586,7 +1586,7 @@ static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb)
 static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m)
 {
 	if (attrs[XFRMA_MARK])
-		memcpy(m, nla_data(attrs[XFRMA_MARK]), sizeof(m));
+		memcpy(m, nla_data(attrs[XFRMA_MARK]), sizeof(struct xfrm_mark));
 	else
 		m->v = m->m = 0;
 
-- 
cgit v1.2.3-55-g7522


From 787a34456dc34bdd75b29cebb53cb09c727674d6 Mon Sep 17 00:00:00 2001
From: Kulikov Vasiliy
Date: Wed, 30 Jun 2010 06:08:15 +0000
Subject: net/neighbour.h: fix typo

'Shoul' must be 'should'.

Signed-off-by: Kulikov Vasiliy <segooon@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index eb21340a573b..242879b6c4df 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -151,7 +151,7 @@ struct neigh_table {
 	void			(*proxy_redo)(struct sk_buff *skb);
 	char			*id;
 	struct neigh_parms	parms;
-	/* HACK. gc_* shoul follow parms without a gap! */
+	/* HACK. gc_* should follow parms without a gap! */
 	int			gc_interval;
 	int			gc_thresh1;
 	int			gc_thresh2;
-- 
cgit v1.2.3-55-g7522


From 4ce3c183fcade7f4b30a33dae90cd774c3d9e094 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Wed, 30 Jun 2010 13:31:19 -0700
Subject: snmp: 64bit ipstats_mib for all arches

/proc/net/snmp and /proc/net/netstat expose SNMP counters.

Width of these counters is either 32 or 64 bits, depending on the size
of "unsigned long" in kernel.

This means user program parsing these files must already be prepared to
deal with 64bit values, regardless of user program being 32 or 64 bit.

This patch introduces 64bit snmp values for IPSTAT mib, where some
counters can wrap pretty fast if they are 32bit wide.

# netstat -s|egrep "InOctets|OutOctets"
    InOctets: 244068329096
    OutOctets: 244069348848

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h    | 20 +++++++++-----
 include/net/ipv6.h  | 12 ++++-----
 include/net/snmp.h  | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 net/ipv4/af_inet.c  | 36 +++++++++++++++++++++++++
 net/ipv4/proc.c     | 15 ++++++-----
 net/ipv6/addrconf.c | 18 ++++++++++++-
 net/ipv6/proc.c     | 17 +++++++++---
 7 files changed, 167 insertions(+), 26 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 3b524df7dddb..890f9725d681 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -165,12 +165,12 @@ struct ipv4_config {
 };
 
 extern struct ipv4_config ipv4_config;
-#define IP_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.ip_statistics, field)
-#define IP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.ip_statistics, field)
-#define IP_ADD_STATS(net, field, val)	SNMP_ADD_STATS((net)->mib.ip_statistics, field, val)
-#define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS_BH((net)->mib.ip_statistics, field, val)
-#define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS((net)->mib.ip_statistics, field, val)
-#define IP_UPD_PO_STATS_BH(net, field, val) SNMP_UPD_PO_STATS_BH((net)->mib.ip_statistics, field, val)
+#define IP_INC_STATS(net, field)	SNMP_INC_STATS64((net)->mib.ip_statistics, field)
+#define IP_INC_STATS_BH(net, field)	SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field)
+#define IP_ADD_STATS(net, field, val)	SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
+#define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val)
+#define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
+#define IP_UPD_PO_STATS_BH(net, field, val) SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define NET_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.net_statistics, field)
 #define NET_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
 #define NET_INC_STATS_USER(net, field) 	SNMP_INC_STATS_USER((net)->mib.net_statistics, field)
@@ -178,6 +178,14 @@ extern struct ipv4_config ipv4_config;
 #define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd)
 
 extern unsigned long snmp_fold_field(void __percpu *mib[], int offt);
+#if BITS_PER_LONG==32
+extern u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t sync_off);
+#else
+static inline u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_off)
+{
+	return snmp_fold_field(mib, offt);
+}
+#endif
 extern int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align);
 extern void snmp_mib_free(void __percpu *ptr[2]);
 
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index f5808d596aab..1f8412410998 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -136,17 +136,17 @@ extern struct ctl_path net_ipv6_ctl_path[];
 /* MIBs */
 
 #define IP6_INC_STATS(net, idev,field)		\
-		_DEVINC(net, ipv6, , idev, field)
+		_DEVINC(net, ipv6, 64, idev, field)
 #define IP6_INC_STATS_BH(net, idev,field)	\
-		_DEVINC(net, ipv6, _BH, idev, field)
+		_DEVINC(net, ipv6, 64_BH, idev, field)
 #define IP6_ADD_STATS(net, idev,field,val)	\
-		_DEVADD(net, ipv6, , idev, field, val)
+		_DEVADD(net, ipv6, 64, idev, field, val)
 #define IP6_ADD_STATS_BH(net, idev,field,val)	\
-		_DEVADD(net, ipv6, _BH, idev, field, val)
+		_DEVADD(net, ipv6, 64_BH, idev, field, val)
 #define IP6_UPD_PO_STATS(net, idev,field,val)   \
-		_DEVUPD(net, ipv6, , idev, field, val)
+		_DEVUPD(net, ipv6, 64, idev, field, val)
 #define IP6_UPD_PO_STATS_BH(net, idev,field,val)   \
-		_DEVUPD(net, ipv6, _BH, idev, field, val)
+		_DEVUPD(net, ipv6, 64_BH, idev, field, val)
 #define ICMP6_INC_STATS(net, idev, field)	\
 		_DEVINC(net, icmpv6, , idev, field)
 #define ICMP6_INC_STATS_BH(net, idev, field)	\
diff --git a/include/net/snmp.h b/include/net/snmp.h
index 899003d18db9..a0e61806d480 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -47,15 +47,16 @@ struct snmp_mib {
 }
 
 /*
- * We use all unsigned longs. Linux will soon be so reliable that even 
- * these will rapidly get too small 8-). Seriously consider the IpInReceives 
- * count on the 20Gb/s + networks people expect in a few years time!
+ * We use unsigned longs for most mibs but u64 for ipstats.
  */
+#include <linux/u64_stats_sync.h>
 
 /* IPstats */
 #define IPSTATS_MIB_MAX	__IPSTATS_MIB_MAX
 struct ipstats_mib {
-	unsigned long	mibs[IPSTATS_MIB_MAX];
+	/* mibs[] must be first field of struct ipstats_mib */
+	u64		mibs[IPSTATS_MIB_MAX];
+	struct u64_stats_sync syncp;
 };
 
 /* ICMP */
@@ -155,4 +156,70 @@ struct linux_xfrm_mib {
 		ptr->mibs[basefield##PKTS]++; \
 		ptr->mibs[basefield##OCTETS] += addend;\
 	} while (0)
+
+
+#if BITS_PER_LONG==32
+
+#define SNMP_ADD_STATS64_BH(mib, field, addend) 			\
+	do {								\
+		__typeof__(*mib[0]) *ptr = __this_cpu_ptr((mib)[0]);	\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[field] += addend;				\
+		u64_stats_update_end(&ptr->syncp);			\
+	} while (0)
+#define SNMP_ADD_STATS64_USER(mib, field, addend) 			\
+	do {								\
+		__typeof__(*mib[0]) *ptr;				\
+		preempt_disable();					\
+		ptr = __this_cpu_ptr((mib)[1]);				\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[field] += addend;				\
+		u64_stats_update_end(&ptr->syncp);			\
+		preempt_enable();					\
+	} while (0)
+#define SNMP_ADD_STATS64(mib, field, addend)				\
+	do {								\
+		__typeof__(*mib[0]) *ptr;				\
+		preempt_disable();					\
+		ptr = __this_cpu_ptr((mib)[!in_softirq()]);		\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[field] += addend;				\
+		u64_stats_update_end(&ptr->syncp);			\
+		preempt_enable();					\
+	} while (0)
+#define SNMP_INC_STATS64_BH(mib, field) SNMP_ADD_STATS64_BH(mib, field, 1)
+#define SNMP_INC_STATS64_USER(mib, field) SNMP_ADD_STATS64_USER(mib, field, 1)
+#define SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1)
+#define SNMP_UPD_PO_STATS64(mib, basefield, addend)			\
+	do {								\
+		__typeof__(*mib[0]) *ptr;				\
+		preempt_disable();					\
+		ptr = __this_cpu_ptr((mib)[!in_softirq()]);		\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[basefield##PKTS]++;				\
+		ptr->mibs[basefield##OCTETS] += addend;			\
+		u64_stats_update_end(&ptr->syncp);			\
+		preempt_enable();					\
+	} while (0)
+#define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend)			\
+	do {								\
+		__typeof__(*mib[0]) *ptr;				\
+		ptr = __this_cpu_ptr((mib)[!in_softirq()]);		\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[basefield##PKTS]++;				\
+		ptr->mibs[basefield##OCTETS] += addend;			\
+		u64_stats_update_end(&ptr->syncp);			\
+	} while (0)
+#else
+#define SNMP_INC_STATS64_BH(mib, field)		SNMP_INC_STATS_BH(mib, field)
+#define SNMP_INC_STATS64_USER(mib, field)	SNMP_INC_STATS_USER(mib, field)
+#define SNMP_INC_STATS64(mib, field)		SNMP_INC_STATS(mib, field)
+#define SNMP_DEC_STATS64(mib, field)		SNMP_DEC_STATS(mib, field)
+#define SNMP_ADD_STATS64_BH(mib, field, addend) SNMP_ADD_STATS_BH(mib, field, addend)
+#define SNMP_ADD_STATS64_USER(mib, field, addend) SNMP_ADD_STATS_USER(mib, field, addend)
+#define SNMP_ADD_STATS64(mib, field, addend)	SNMP_ADD_STATS(mib, field, addend)
+#define SNMP_UPD_PO_STATS64(mib, basefield, addend) SNMP_UPD_PO_STATS(mib, basefield, addend)
+#define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend) SNMP_UPD_PO_STATS_BH(mib, basefield, addend)
+#endif
+
 #endif
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 640db9b90330..3ceb025b16f2 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1427,6 +1427,42 @@ unsigned long snmp_fold_field(void __percpu *mib[], int offt)
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
+#if BITS_PER_LONG==32
+
+u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
+{
+	u64 res = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		void *bhptr, *userptr;
+		struct u64_stats_sync *syncp;
+		u64 v_bh, v_user;
+		unsigned int start;
+
+		/* first mib used by softirq context, we must use _bh() accessors */
+		bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu);
+		syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
+		do {
+			start = u64_stats_fetch_begin_bh(syncp);
+			v_bh = *(((u64 *) bhptr) + offt);
+		} while (u64_stats_fetch_retry_bh(syncp, start));
+
+		/* second mib used in USER context */
+		userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu);
+		syncp = (struct u64_stats_sync *)(userptr + syncp_offset);
+		do {
+			start = u64_stats_fetch_begin(syncp);
+			v_user = *(((u64 *) userptr) + offt);
+		} while (u64_stats_fetch_retry(syncp, start));
+
+		res += v_bh + v_user;
+	}
+	return res;
+}
+EXPORT_SYMBOL_GPL(snmp_fold_field64);
+#endif
+
 int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
 {
 	BUG_ON(ptr == NULL);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e320ca6b3ef3..4ae1f203f7cb 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -343,10 +343,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 		   IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
 		   sysctl_ip_default_ttl);
 
+	BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
-		seq_printf(seq, " %lu",
-			   snmp_fold_field((void __percpu **)net->mib.ip_statistics,
-					   snmp4_ipstats_list[i].entry));
+		seq_printf(seq, " %llu",
+			   snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+					     snmp4_ipstats_list[i].entry,
+					     offsetof(struct ipstats_mib, syncp)));
 
 	icmp_put(seq);	/* RFC 2011 compatibility */
 	icmpmsg_put(seq);
@@ -432,9 +434,10 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 
 	seq_puts(seq, "\nIpExt:");
 	for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
-		seq_printf(seq, " %lu",
-			   snmp_fold_field((void __percpu **)net->mib.ip_statistics,
-					   snmp4_ipextstats_list[i].entry));
+		seq_printf(seq, " %llu",
+			   snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+					     snmp4_ipextstats_list[i].entry,
+					     offsetof(struct ipstats_mib, syncp)));
 
 	seq_putc(seq, '\n');
 	return 0;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2514adf5251e..e81155d2f251 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3862,12 +3862,28 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
 	memset(&stats[items], 0, pad);
 }
 
+static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib,
+				      int items, int bytes, size_t syncpoff)
+{
+	int i;
+	int pad = bytes - sizeof(u64) * items;
+	BUG_ON(pad < 0);
+
+	/* Use put_unaligned() because stats may not be aligned for u64. */
+	put_unaligned(items, &stats[0]);
+	for (i = 1; i < items; i++)
+		put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]);
+
+	memset(&stats[items], 0, pad);
+}
+
 static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
 			     int bytes)
 {
 	switch (attrtype) {
 	case IFLA_INET6_STATS:
-		__snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
+		__snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6,
+				     IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
 		break;
 	case IFLA_INET6_ICMP6STATS:
 		__snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 566798d69f37..d082eaeefa25 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -174,17 +174,28 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib,
 				const struct snmp_mib *itemlist)
 {
 	int i;
-	for (i=0; itemlist[i].name; i++)
+
+	for (i = 0; itemlist[i].name; i++)
 		seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
 			   snmp_fold_field(mib, itemlist[i].entry));
 }
 
+static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib,
+				  const struct snmp_mib *itemlist, size_t syncpoff)
+{
+	int i;
+
+	for (i = 0; itemlist[i].name; i++)
+		seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name,
+			   snmp_fold_field64(mib, itemlist[i].entry, syncpoff));
+}
+
 static int snmp6_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = (struct net *)seq->private;
 
-	snmp6_seq_show_item(seq, (void __percpu **)net->mib.ipv6_statistics,
-			    snmp6_ipstats_list);
+	snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics,
+			    snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
 	snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
 			    snmp6_icmp6_list);
 	snmp6_seq_show_icmpv6msg(seq,
-- 
cgit v1.2.3-55-g7522


From d6bebca92c663fb216c072193945946f3807ca7f Mon Sep 17 00:00:00 2001
From: Changli Gao
Date: Tue, 29 Jun 2010 04:39:37 +0000
Subject: fragment: add fast path for in-order fragments

add fast path for in-order fragments

As the fragments are sent in order in most of OSes, such as Windows, Darwin and
FreeBSD, it is likely the new fragments are at the end of the inet_frag_queue.
In the fast path, we check if the skb at the end of the inet_frag_queue is the
prev we expect.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/inet_frag.h |    1 +
 net/ipv4/ip_fragment.c  |   12 ++++++++++++
 net/ipv6/reassembly.c   |   11 +++++++++++
 3 files changed, 24 insertions(+)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h |  1 +
 net/ipv4/ip_fragment.c  | 12 ++++++++++++
 net/ipv6/reassembly.c   | 11 +++++++++++
 3 files changed, 24 insertions(+)

(limited to 'include/net')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 39f2dc943908..16ff29a7bb30 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -20,6 +20,7 @@ struct inet_frag_queue {
 	atomic_t		refcnt;
 	struct timer_list	timer;      /* when will this queue expire? */
 	struct sk_buff		*fragments; /* list of received fragments */
+	struct sk_buff		*fragments_tail;
 	ktime_t			stamp;
 	int			len;        /* total length of orig datagram */
 	int			meat;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 858d34648eee..dd0dbf0c6b7f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -314,6 +314,7 @@ static int ip_frag_reinit(struct ipq *qp)
 	qp->q.len = 0;
 	qp->q.meat = 0;
 	qp->q.fragments = NULL;
+	qp->q.fragments_tail = NULL;
 	qp->iif = 0;
 
 	return 0;
@@ -386,6 +387,11 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	 * in the chain of fragments so far.  We must know where to put
 	 * this fragment, right?
 	 */
+	prev = qp->q.fragments_tail;
+	if (!prev || FRAG_CB(prev)->offset < offset) {
+		next = NULL;
+		goto found;
+	}
 	prev = NULL;
 	for (next = qp->q.fragments; next != NULL; next = next->next) {
 		if (FRAG_CB(next)->offset >= offset)
@@ -393,6 +399,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 		prev = next;
 	}
 
+found:
 	/* We found where to put this one.  Check for overlap with
 	 * preceding fragment, and, if needed, align things so that
 	 * any overlaps are eliminated.
@@ -451,6 +458,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
+	if (!next)
+		qp->q.fragments_tail = skb;
 	if (prev)
 		prev->next = skb;
 	else
@@ -504,6 +513,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 			goto out_nomem;
 
 		fp->next = head->next;
+		if (!fp->next)
+			qp->q.fragments_tail = fp;
 		prev->next = fp;
 
 		skb_morph(head, qp->q.fragments);
@@ -574,6 +585,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	iph->tot_len = htons(len);
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
 	qp->q.fragments = NULL;
+	qp->q.fragments_tail = NULL;
 	return 0;
 
 out_nomem:
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 0b97230a3251..545c4141b755 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -333,6 +333,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	 * in the chain of fragments so far.  We must know where to put
 	 * this fragment, right?
 	 */
+	prev = fq->q.fragments_tail;
+	if (!prev || FRAG6_CB(prev)->offset < offset) {
+		next = NULL;
+		goto found;
+	}
 	prev = NULL;
 	for(next = fq->q.fragments; next != NULL; next = next->next) {
 		if (FRAG6_CB(next)->offset >= offset)
@@ -340,6 +345,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 		prev = next;
 	}
 
+found:
 	/* We found where to put this one.  Check for overlap with
 	 * preceding fragment, and, if needed, align things so that
 	 * any overlaps are eliminated.
@@ -397,6 +403,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
+	if (!next)
+		fq->q.fragments_tail = skb;
 	if (prev)
 		prev->next = skb;
 	else
@@ -463,6 +471,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 			goto out_oom;
 
 		fp->next = head->next;
+		if (!fp->next)
+			fq->q.fragments_tail = fp;
 		prev->next = fp;
 
 		skb_morph(head, fq->q.fragments);
@@ -549,6 +559,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
 	rcu_read_unlock();
 	fq->q.fragments = NULL;
+	fq->q.fragments_tail = NULL;
 	return 1;
 
 out_oversize:
-- 
cgit v1.2.3-55-g7522


From 4ef6acff83222f4496ceef7d1f0ee9e50a5bb403 Mon Sep 17 00:00:00 2001
From: John Fastabend
Date: Thu, 1 Jul 2010 13:21:35 +0000
Subject: sched: qdisc_reset_all_tx is calling qdisc_reset without qdisc_lock

When calling qdisc_reset() the qdisc lock needs to be held.  In
this case there is at least one driver i4l which is using this
without holding the lock.  Add the locking here.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 03ca5d826757..ba749be1e354 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -317,8 +317,16 @@ extern void tcf_destroy_chain(struct tcf_proto **fl);
 static inline void qdisc_reset_all_tx(struct net_device *dev)
 {
 	unsigned int i;
-	for (i = 0; i < dev->num_tx_queues; i++)
-		qdisc_reset(netdev_get_tx_queue(dev, i)->qdisc);
+	struct Qdisc *qdisc;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+		if (qdisc) {
+			spin_lock_bh(qdisc_lock(qdisc));
+			qdisc_reset(qdisc);
+			spin_unlock_bh(qdisc_lock(qdisc));
+		}
+	}
 }
 
 /* Are all TX queues of the device empty?  */
-- 
cgit v1.2.3-55-g7522


From f0796d5c73e59786d09a1e617689d1d415f2db44 Mon Sep 17 00:00:00 2001
From: John Fastabend
Date: Thu, 1 Jul 2010 13:21:57 +0000
Subject: net: decreasing real_num_tx_queues needs to flush qdisc

Reducing real_num_queues needs to flush the qdisc otherwise
skbs with queue_mappings greater then real_num_tx_queues can
be sent to the underlying driver.

The flow for this is,

dev_queue_xmit()
	dev_pick_tx()
		skb_tx_hash()  => hash using real_num_tx_queues
		skb_set_queue_mapping()
	...
	qdisc_enqueue_root() => enqueue skb on txq from hash
...
dev->real_num_tx_queues -= n
...
sch_direct_xmit()
	dev_hard_start_xmit()
		ndo_start_xmit(skb,dev) => skb queue set with old hash

skbs are enqueued on the qdisc with skb->queue_mapping set
0 < queue_mappings < real_num_tx_queues.  When the driver
decreases real_num_tx_queues skb's may be dequeued from the
qdisc with a queue_mapping greater then real_num_tx_queues.

This fixes a case in ixgbe where this was occurring with DCB
and FCoE. Because the driver is using queue_mapping to map
skbs to tx descriptor rings we can potentially map skbs to
rings that no longer exist.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_main.c |  2 +-
 include/linux/netdevice.h      |  3 +++
 include/net/sch_generic.h      | 12 ++++++++----
 net/core/dev.c                 | 18 ++++++++++++++++++
 4 files changed, 30 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index a0b33165b989..7b5d9764f317 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -4001,7 +4001,7 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
 
 done:
 	/* Notify the stack of the (possibly) reduced Tx Queue count. */
-	adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
+	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
 }
 
 static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 40291f375024..5e6188d9f017 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1656,6 +1656,9 @@ static inline int netif_is_multiqueue(const struct net_device *dev)
 	return (dev->num_tx_queues > 1);
 }
 
+extern void netif_set_real_num_tx_queues(struct net_device *dev,
+					 unsigned int txq);
+
 /* Use this variant when it is known for sure that it
  * is executing from hardware interrupt context or with hardware interrupts
  * disabled.
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ba749be1e354..433604bb3fe8 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -313,13 +313,12 @@ extern void qdisc_calculate_pkt_len(struct sk_buff *skb,
 extern void tcf_destroy(struct tcf_proto *tp);
 extern void tcf_destroy_chain(struct tcf_proto **fl);
 
-/* Reset all TX qdiscs of a device.  */
-static inline void qdisc_reset_all_tx(struct net_device *dev)
+/* Reset all TX qdiscs greater then index of a device.  */
+static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 {
-	unsigned int i;
 	struct Qdisc *qdisc;
 
-	for (i = 0; i < dev->num_tx_queues; i++) {
+	for (; i < dev->num_tx_queues; i++) {
 		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
 		if (qdisc) {
 			spin_lock_bh(qdisc_lock(qdisc));
@@ -329,6 +328,11 @@ static inline void qdisc_reset_all_tx(struct net_device *dev)
 	}
 }
 
+static inline void qdisc_reset_all_tx(struct net_device *dev)
+{
+	qdisc_reset_all_tx_gt(dev, 0);
+}
+
 /* Are all TX queues of the device empty?  */
 static inline bool qdisc_all_tx_empty(const struct net_device *dev)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 2b3bf53bc687..723a34710ad4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1553,6 +1553,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_unlock();
 }
 
+/*
+ * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
+ * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ */
+void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+{
+	unsigned int real_num = dev->real_num_tx_queues;
+
+	if (unlikely(txq > dev->num_tx_queues))
+		;
+	else if (txq > real_num)
+		dev->real_num_tx_queues = txq;
+	else if (txq < real_num) {
+		dev->real_num_tx_queues = txq;
+		qdisc_reset_all_tx_gt(dev, txq);
+	}
+}
+EXPORT_SYMBOL(netif_set_real_num_tx_queues);
 
 static inline void __netif_reschedule(struct Qdisc *q)
 {
-- 
cgit v1.2.3-55-g7522


From 53d3176b282cc105493babb0fef36c8b873f6201 Mon Sep 17 00:00:00 2001
From: Changli Gao
Date: Sat, 10 Jul 2010 20:41:06 +0000
Subject: net: cleanups

remove useless blanks.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/inet_common.h |   55 ++++-------
 include/net/tcp.h         |  222 +++++++++++++++++-----------------------------
 include/net/udp.h         |   38 +++----
 3 files changed, 123 insertions(+), 192 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_common.h |  55 +++++-------
 include/net/tcp.h         | 222 ++++++++++++++++++----------------------------
 include/net/udp.h         |  38 ++++----
 3 files changed, 123 insertions(+), 192 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 18c773286b91..140c1ec9f8a2 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -1,8 +1,8 @@
 #ifndef _INET_COMMON_H
 #define _INET_COMMON_H
 
-extern const struct proto_ops		inet_stream_ops;
-extern const struct proto_ops		inet_dgram_ops;
+extern const struct proto_ops inet_stream_ops;
+extern const struct proto_ops inet_dgram_ops;
 
 /*
  *	INET4 prototypes used by INET6
@@ -13,37 +13,24 @@ struct sock;
 struct sockaddr;
 struct socket;
 
-extern int			inet_release(struct socket *sock);
-extern int			inet_stream_connect(struct socket *sock,
-						    struct sockaddr * uaddr,
-						    int addr_len, int flags);
-extern int			inet_dgram_connect(struct socket *sock, 
-						   struct sockaddr * uaddr,
-						   int addr_len, int flags);
-extern int			inet_accept(struct socket *sock, 
-					    struct socket *newsock, int flags);
-extern int			inet_sendmsg(struct kiocb *iocb,
-					     struct socket *sock, 
-					     struct msghdr *msg, 
-					     size_t size);
-extern int			inet_shutdown(struct socket *sock, int how);
-extern int			inet_listen(struct socket *sock, int backlog);
-
-extern void			inet_sock_destruct(struct sock *sk);
-
-extern int			inet_bind(struct socket *sock, 
-					  struct sockaddr *uaddr, int addr_len);
-extern int			inet_getname(struct socket *sock, 
-					     struct sockaddr *uaddr, 
-					     int *uaddr_len, int peer);
-extern int			inet_ioctl(struct socket *sock, 
-					   unsigned int cmd, unsigned long arg);
-
-extern int			inet_ctl_sock_create(struct sock **sk,
-						     unsigned short family,
-						     unsigned short type,
-						     unsigned char protocol,
-						     struct net *net);
+extern int inet_release(struct socket *sock);
+extern int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
+			       int addr_len, int flags);
+extern int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
+			      int addr_len, int flags);
+extern int inet_accept(struct socket *sock, struct socket *newsock, int flags);
+extern int inet_sendmsg(struct kiocb *iocb, struct socket *sock,
+			struct msghdr *msg, size_t size);
+extern int inet_shutdown(struct socket *sock, int how);
+extern int inet_listen(struct socket *sock, int backlog);
+extern void inet_sock_destruct(struct sock *sk);
+extern int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
+extern int inet_getname(struct socket *sock, struct sockaddr *uaddr,
+			int *uaddr_len, int peer);
+extern int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
+extern int inet_ctl_sock_create(struct sock **sk, unsigned short family,
+				unsigned short type, unsigned char protocol,
+				struct net *net);
 
 static inline void inet_ctl_sock_destroy(struct sock *sk)
 {
@@ -51,5 +38,3 @@ static inline void inet_ctl_sock_destroy(struct sock *sk)
 }
 
 #endif
-
-
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c2f96c2cc89c..33ce5242a17a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -296,45 +296,30 @@ extern struct proto tcp_prot;
 #define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val)
 #define TCP_ADD_STATS(net, field, val)	SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
 
-extern void			tcp_v4_err(struct sk_buff *skb, u32);
-
-extern void			tcp_shutdown (struct sock *sk, int how);
-
-extern int			tcp_v4_rcv(struct sk_buff *skb);
-
-extern int			tcp_v4_remember_stamp(struct sock *sk);
-
-extern int		    	tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
-
-extern int			tcp_sendmsg(struct kiocb *iocb, struct socket *sock,
-					    struct msghdr *msg, size_t size);
-extern ssize_t			tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
-
-extern int			tcp_ioctl(struct sock *sk, 
-					  int cmd, 
-					  unsigned long arg);
-
-extern int			tcp_rcv_state_process(struct sock *sk, 
-						      struct sk_buff *skb,
-						      struct tcphdr *th,
-						      unsigned len);
-
-extern int			tcp_rcv_established(struct sock *sk, 
-						    struct sk_buff *skb,
-						    struct tcphdr *th, 
-						    unsigned len);
-
-extern void			tcp_rcv_space_adjust(struct sock *sk);
-
-extern void			tcp_cleanup_rbuf(struct sock *sk, int copied);
-
-extern int			tcp_twsk_unique(struct sock *sk,
-						struct sock *sktw, void *twp);
-
-extern void			tcp_twsk_destructor(struct sock *sk);
-
-extern ssize_t			tcp_splice_read(struct socket *sk, loff_t *ppos,
-					        struct pipe_inode_info *pipe, size_t len, unsigned int flags);
+extern void tcp_v4_err(struct sk_buff *skb, u32);
+
+extern void tcp_shutdown (struct sock *sk, int how);
+
+extern int tcp_v4_rcv(struct sk_buff *skb);
+
+extern int tcp_v4_remember_stamp(struct sock *sk);
+extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
+extern int tcp_sendmsg(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *msg, size_t size);
+extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
+			    size_t size, int flags);
+extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+				 struct tcphdr *th, unsigned len);
+extern int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
+			       struct tcphdr *th, unsigned len);
+extern void tcp_rcv_space_adjust(struct sock *sk);
+extern void tcp_cleanup_rbuf(struct sock *sk, int copied);
+extern int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
+extern void tcp_twsk_destructor(struct sock *sk);
+extern ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
+			       struct pipe_inode_info *pipe, size_t len,
+			       unsigned int flags);
 
 static inline void tcp_dec_quickack_mode(struct sock *sk,
 					 const unsigned int pkts)
@@ -372,88 +357,59 @@ enum tcp_tw_status {
 };
 
 
-extern enum tcp_tw_status	tcp_timewait_state_process(struct inet_timewait_sock *tw,
-							   struct sk_buff *skb,
-							   const struct tcphdr *th);
-
-extern struct sock *		tcp_check_req(struct sock *sk,struct sk_buff *skb,
-					      struct request_sock *req,
-					      struct request_sock **prev);
-extern int			tcp_child_process(struct sock *parent,
-						  struct sock *child,
-						  struct sk_buff *skb);
-extern int			tcp_use_frto(struct sock *sk);
-extern void			tcp_enter_frto(struct sock *sk);
-extern void			tcp_enter_loss(struct sock *sk, int how);
-extern void			tcp_clear_retrans(struct tcp_sock *tp);
-extern void			tcp_update_metrics(struct sock *sk);
-
-extern void			tcp_close(struct sock *sk, 
-					  long timeout);
-extern unsigned int		tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
-
-extern int			tcp_getsockopt(struct sock *sk, int level, 
-					       int optname,
-					       char __user *optval, 
-					       int __user *optlen);
-extern int			tcp_setsockopt(struct sock *sk, int level, 
-					       int optname, char __user *optval, 
-					       unsigned int optlen);
-extern int			compat_tcp_getsockopt(struct sock *sk,
-					int level, int optname,
-					char __user *optval, int __user *optlen);
-extern int			compat_tcp_setsockopt(struct sock *sk,
-					int level, int optname,
-					char __user *optval, unsigned int optlen);
-extern void			tcp_set_keepalive(struct sock *sk, int val);
-extern void			tcp_syn_ack_timeout(struct sock *sk,
-						    struct request_sock *req);
-extern int			tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
-					    struct msghdr *msg,
-					    size_t len, int nonblock, 
-					    int flags, int *addr_len);
-
-extern void			tcp_parse_options(struct sk_buff *skb,
-						  struct tcp_options_received *opt_rx,
-						  u8 **hvpp,
-						  int estab);
-
-extern u8			*tcp_parse_md5sig_option(struct tcphdr *th);
+extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
+						     struct sk_buff *skb,
+						     const struct tcphdr *th);
+extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
+				   struct request_sock *req,
+				   struct request_sock **prev);
+extern int tcp_child_process(struct sock *parent, struct sock *child,
+			     struct sk_buff *skb);
+extern int tcp_use_frto(struct sock *sk);
+extern void tcp_enter_frto(struct sock *sk);
+extern void tcp_enter_loss(struct sock *sk, int how);
+extern void tcp_clear_retrans(struct tcp_sock *tp);
+extern void tcp_update_metrics(struct sock *sk);
+extern void tcp_close(struct sock *sk, long timeout);
+extern unsigned int tcp_poll(struct file * file, struct socket *sock,
+			     struct poll_table_struct *wait);
+extern int tcp_getsockopt(struct sock *sk, int level, int optname,
+			  char __user *optval, int __user *optlen);
+extern int tcp_setsockopt(struct sock *sk, int level, int optname,
+			  char __user *optval, unsigned int optlen);
+extern int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
+				 char __user *optval, int __user *optlen);
+extern int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
+				 char __user *optval, unsigned int optlen);
+extern void tcp_set_keepalive(struct sock *sk, int val);
+extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req);
+extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		       size_t len, int nonblock, int flags, int *addr_len);
+extern void tcp_parse_options(struct sk_buff *skb,
+			      struct tcp_options_received *opt_rx, u8 **hvpp,
+			      int estab);
+extern u8 *tcp_parse_md5sig_option(struct tcphdr *th);
 
 /*
  *	TCP v4 functions exported for the inet6 API
  */
 
-extern void		       	tcp_v4_send_check(struct sock *sk,
-						  struct sk_buff *skb);
-
-extern int			tcp_v4_conn_request(struct sock *sk,
-						    struct sk_buff *skb);
-
-extern struct sock *		tcp_create_openreq_child(struct sock *sk,
-							 struct request_sock *req,
-							 struct sk_buff *skb);
-
-extern struct sock *		tcp_v4_syn_recv_sock(struct sock *sk,
-						     struct sk_buff *skb,
-						     struct request_sock *req,
-							struct dst_entry *dst);
-
-extern int			tcp_v4_do_rcv(struct sock *sk,
+extern void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
+extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
+extern struct sock * tcp_create_openreq_child(struct sock *sk,
+					      struct request_sock *req,
 					      struct sk_buff *skb);
-
-extern int			tcp_v4_connect(struct sock *sk,
-					       struct sockaddr *uaddr,
-					       int addr_len);
-
-extern int			tcp_connect(struct sock *sk);
-
-extern struct sk_buff *		tcp_make_synack(struct sock *sk,
-						struct dst_entry *dst,
-						struct request_sock *req,
-						struct request_values *rvp);
-
-extern int			tcp_disconnect(struct sock *sk, int flags);
+extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+					  struct request_sock *req,
+					  struct dst_entry *dst);
+extern int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
+extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
+			  int addr_len);
+extern int tcp_connect(struct sock *sk);
+extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
+					struct request_sock *req,
+					struct request_values *rvp);
+extern int tcp_disconnect(struct sock *sk, int flags);
 
 
 /* From syncookies.c */
@@ -485,10 +441,10 @@ extern int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int);
 
 extern void tcp_send_probe0(struct sock *);
 extern void tcp_send_partial(struct sock *);
-extern int  tcp_write_wakeup(struct sock *);
+extern int tcp_write_wakeup(struct sock *);
 extern void tcp_send_fin(struct sock *sk);
 extern void tcp_send_active_reset(struct sock *sk, gfp_t priority);
-extern int  tcp_send_synack(struct sock *);
+extern int tcp_send_synack(struct sock *);
 extern void tcp_push_one(struct sock *, unsigned int mss_now);
 extern void tcp_send_ack(struct sock *sk);
 extern void tcp_send_delayed_ack(struct sock *sk);
@@ -592,7 +548,7 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp)
  * scaling applied to the result.  The caller does these things
  * if necessary.  This is a "raw" window selection.
  */
-extern u32	__tcp_select_window(struct sock *sk);
+extern u32 __tcp_select_window(struct sock *sk);
 
 /* TCP timestamps are only 32-bits, this causes a slight
  * complication on 64-bit systems since we store a snapshot
@@ -1174,22 +1130,14 @@ struct tcp_md5sig_pool {
 #define TCP_MD5SIG_MAXKEYS	(~(u32)0)	/* really?! */
 
 /* - functions */
-extern int			tcp_v4_md5_hash_skb(char *md5_hash,
-						    struct tcp_md5sig_key *key,
-						    struct sock *sk,
-						    struct request_sock *req,
-						    struct sk_buff *skb);
-
-extern struct tcp_md5sig_key	*tcp_v4_md5_lookup(struct sock *sk,
-						   struct sock *addr_sk);
-
-extern int			tcp_v4_md5_do_add(struct sock *sk,
-						  __be32 addr,
-						  u8 *newkey,
-						  u8 newkeylen);
-
-extern int			tcp_v4_md5_do_del(struct sock *sk,
-						  __be32 addr);
+extern int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
+			       struct sock *sk, struct request_sock *req,
+			       struct sk_buff *skb);
+extern struct tcp_md5sig_key * tcp_v4_md5_lookup(struct sock *sk,
+						 struct sock *addr_sk);
+extern int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, u8 *newkey,
+			     u8 newkeylen);
+extern int tcp_v4_md5_do_del(struct sock *sk, __be32 addr);
 
 #ifdef CONFIG_TCP_MD5SIG
 #define tcp_twsk_md5_key(twsk)	((twsk)->tw_md5_keylen ? 		 \
@@ -1202,10 +1150,10 @@ extern int			tcp_v4_md5_do_del(struct sock *sk,
 #endif
 
 extern struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *);
-extern void			tcp_free_md5sig_pool(void);
+extern void tcp_free_md5sig_pool(void);
 
 extern struct tcp_md5sig_pool	*tcp_get_md5sig_pool(void);
-extern void			tcp_put_md5sig_pool(void);
+extern void tcp_put_md5sig_pool(void);
 
 extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *);
 extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *,
@@ -1433,7 +1381,7 @@ extern int tcp_gro_complete(struct sk_buff *skb);
 extern int tcp4_gro_complete(struct sk_buff *skb);
 
 #ifdef CONFIG_PROC_FS
-extern int  tcp4_proc_init(void);
+extern int tcp4_proc_init(void);
 extern void tcp4_proc_exit(void);
 #endif
 
diff --git a/include/net/udp.h b/include/net/udp.h
index 5348d80b25bb..7abdf305da50 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -157,30 +157,28 @@ static inline void udp_lib_close(struct sock *sk, long timeout)
 	sk_common_release(sk);
 }
 
-extern int	udp_lib_get_port(struct sock *sk, unsigned short snum,
-		int (*)(const struct sock *,const struct sock *),
-		unsigned int hash2_nulladdr);
+extern int udp_lib_get_port(struct sock *sk, unsigned short snum,
+			    int (*)(const struct sock *,const struct sock *),
+			    unsigned int hash2_nulladdr);
 
 /* net/ipv4/udp.c */
-extern int	udp_get_port(struct sock *sk, unsigned short snum,
-			     int (*saddr_cmp)(const struct sock *, const struct sock *));
-extern void	udp_err(struct sk_buff *, u32);
-
-extern int	udp_sendmsg(struct kiocb *iocb, struct sock *sk,
+extern int udp_get_port(struct sock *sk, unsigned short snum,
+			int (*saddr_cmp)(const struct sock *,
+					 const struct sock *));
+extern void udp_err(struct sk_buff *, u32);
+extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk,
 			    struct msghdr *msg, size_t len);
-extern void	udp_flush_pending_frames(struct sock *sk);
-
-extern int	udp_rcv(struct sk_buff *skb);
-extern int	udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
-extern int	udp_disconnect(struct sock *sk, int flags);
+extern void udp_flush_pending_frames(struct sock *sk);
+extern int udp_rcv(struct sk_buff *skb);
+extern int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+extern int udp_disconnect(struct sock *sk, int flags);
 extern unsigned int udp_poll(struct file *file, struct socket *sock,
 			     poll_table *wait);
-extern int 	udp_lib_getsockopt(struct sock *sk, int level, int optname,
-			           char __user *optval, int __user *optlen);
-extern int 	udp_lib_setsockopt(struct sock *sk, int level, int optname,
-				   char __user *optval, unsigned int optlen,
-				   int (*push_pending_frames)(struct sock *));
-
+extern int udp_lib_getsockopt(struct sock *sk, int level, int optname,
+			      char __user *optval, int __user *optlen);
+extern int udp_lib_setsockopt(struct sock *sk, int level, int optname,
+			      char __user *optval, unsigned int optlen,
+			      int (*push_pending_frames)(struct sock *));
 extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 				    __be32 daddr, __be16 dport,
 				    int dif);
@@ -236,7 +234,7 @@ struct udp_iter_state {
 extern int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo);
 extern void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo);
 
-extern int  udp4_proc_init(void);
+extern int udp4_proc_init(void);
 extern void udp4_proc_exit(void);
 #endif
 
-- 
cgit v1.2.3-55-g7522


From 7ba42910073f8432934d61a6c08b1023c408fb62 Mon Sep 17 00:00:00 2001
From: Changli Gao
Date: Sat, 10 Jul 2010 20:41:55 +0000
Subject: inet, inet6: make tcp_sendmsg() and tcp_sendpage() through
 inet_sendmsg() and inet_sendpage()

a new boolean flag no_autobind is added to structure proto to avoid the autobind
calls when the protocol is TCP. Then sock_rps_record_flow() is called int the
TCP's sendmsg() and sendpage() pathes.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/inet_common.h |    4 ++++
 include/net/sock.h        |    1 +
 include/net/tcp.h         |    8 ++++----
 net/ipv4/af_inet.c        |   15 +++++++++------
 net/ipv4/tcp.c            |   11 +++++------
 net/ipv4/tcp_ipv4.c       |    3 +++
 net/ipv6/af_inet6.c       |    8 ++++----
 net/ipv6/tcp_ipv6.c       |    3 +++
 8 files changed, 33 insertions(+), 20 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_common.h |  4 ++++
 include/net/sock.h        |  1 +
 include/net/tcp.h         |  8 ++++----
 net/ipv4/af_inet.c        | 15 +++++++++------
 net/ipv4/tcp.c            | 11 +++++------
 net/ipv4/tcp_ipv4.c       |  3 +++
 net/ipv6/af_inet6.c       |  8 ++++----
 net/ipv6/tcp_ipv6.c       |  3 +++
 8 files changed, 33 insertions(+), 20 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 140c1ec9f8a2..22fac9892b16 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -21,6 +21,10 @@ extern int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
 extern int inet_accept(struct socket *sock, struct socket *newsock, int flags);
 extern int inet_sendmsg(struct kiocb *iocb, struct socket *sock,
 			struct msghdr *msg, size_t size);
+extern ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
+			     size_t size, int flags);
+extern int inet_recvmsg(struct kiocb *iocb, struct socket *sock,
+			struct msghdr *msg, size_t size, int flags);
 extern int inet_shutdown(struct socket *sock, int how);
 extern int inet_listen(struct socket *sock, int backlog);
 extern void inet_sock_destruct(struct sock *sk);
diff --git a/include/net/sock.h b/include/net/sock.h
index 4f26f2f83be9..3100e71f0c3d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -772,6 +772,7 @@ struct proto {
 	int			*sysctl_wmem;
 	int			*sysctl_rmem;
 	int			max_header;
+	bool			no_autobind;
 
 	struct kmem_cache	*slab;
 	unsigned int		obj_size;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 33ce5242a17a..468b01f01c13 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -304,10 +304,10 @@ extern int tcp_v4_rcv(struct sk_buff *skb);
 
 extern int tcp_v4_remember_stamp(struct sock *sk);
 extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
-extern int tcp_sendmsg(struct kiocb *iocb, struct socket *sock,
-		       struct msghdr *msg, size_t size);
-extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
-			    size_t size, int flags);
+extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		       size_t size);
+extern int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+			size_t size, int flags);
 extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				 struct tcphdr *th, unsigned len);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3ceb025b16f2..6a1100c25a9f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -727,28 +727,31 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	sock_rps_record_flow(sk);
 
 	/* We may need to bind the socket. */
-	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
+	if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
+	    inet_autobind(sk))
 		return -EAGAIN;
 
 	return sk->sk_prot->sendmsg(iocb, sk, msg, size);
 }
 EXPORT_SYMBOL(inet_sendmsg);
 
-static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
-			     size_t size, int flags)
+ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
+		      size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
 
 	sock_rps_record_flow(sk);
 
 	/* We may need to bind the socket. */
-	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
+	if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
+	    inet_autobind(sk))
 		return -EAGAIN;
 
 	if (sk->sk_prot->sendpage)
 		return sk->sk_prot->sendpage(sk, page, offset, size, flags);
 	return sock_no_sendpage(sock, page, offset, size, flags);
 }
+EXPORT_SYMBOL(inet_sendpage);
 
 int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 		 size_t size, int flags)
@@ -894,10 +897,10 @@ const struct proto_ops inet_stream_ops = {
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
-	.sendmsg	   = tcp_sendmsg,
+	.sendmsg	   = inet_sendmsg,
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = tcp_sendpage,
+	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b8601b7683a6..9fce8a8a13aa 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -857,15 +857,15 @@ out_err:
 	return sk_stream_error(sk, flags, err);
 }
 
-ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
-		     size_t size, int flags)
+int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+		 size_t size, int flags)
 {
 	ssize_t res;
-	struct sock *sk = sock->sk;
 
 	if (!(sk->sk_route_caps & NETIF_F_SG) ||
 	    !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
-		return sock_no_sendpage(sock, page, offset, size, flags);
+		return sock_no_sendpage(sk->sk_socket, page, offset, size,
+					flags);
 
 	lock_sock(sk);
 	TCP_CHECK_TIMER(sk);
@@ -899,10 +899,9 @@ static inline int select_size(struct sock *sk, int sg)
 	return tmp;
 }
 
-int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		size_t size)
 {
-	struct sock *sk = sock->sk;
 	struct iovec *iov;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 44545e8e8c92..020766292bb0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2600,6 +2600,8 @@ struct proto tcp_prot = {
 	.setsockopt		= tcp_setsockopt,
 	.getsockopt		= tcp_getsockopt,
 	.recvmsg		= tcp_recvmsg,
+	.sendmsg		= tcp_sendmsg,
+	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v4_do_rcv,
 	.hash			= inet_hash,
 	.unhash			= inet_unhash,
@@ -2618,6 +2620,7 @@ struct proto tcp_prot = {
 	.twsk_prot		= &tcp_timewait_sock_ops,
 	.rsk_prot		= &tcp_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
+	.no_autobind		= true,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e830cd4f9d0f..56b9bf2516f4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -522,10 +522,10 @@ const struct proto_ops inet6_stream_ops = {
 	.shutdown	   = inet_shutdown,		/* ok		*/
 	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
-	.sendmsg	   = tcp_sendmsg,		/* ok		*/
-	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
+	.sendmsg	   = inet_sendmsg,		/* ok		*/
+	.recvmsg	   = inet_recvmsg,		/* ok		*/
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = tcp_sendpage,
+	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
@@ -549,7 +549,7 @@ const struct proto_ops inet6_dgram_ops = {
 	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
 	.sendmsg	   = inet_sendmsg,		/* ok		*/
-	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
+	.recvmsg	   = inet_recvmsg,		/* ok		*/
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = sock_no_sendpage,
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5ebc27ecebdc..fe6d40418c0b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2142,6 +2142,8 @@ struct proto tcpv6_prot = {
 	.setsockopt		= tcp_setsockopt,
 	.getsockopt		= tcp_getsockopt,
 	.recvmsg		= tcp_recvmsg,
+	.sendmsg		= tcp_sendmsg,
+	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v6_do_rcv,
 	.hash			= tcp_v6_hash,
 	.unhash			= inet_unhash,
@@ -2160,6 +2162,7 @@ struct proto tcpv6_prot = {
 	.twsk_prot		= &tcp6_timewait_sock_ops,
 	.rsk_prot		= &tcp6_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
+	.no_autobind		= true,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
-- 
cgit v1.2.3-55-g7522


From b0f77d0eae0c58a5a9691a067ada112ceeae2d00 Mon Sep 17 00:00:00 2001
From: Tom Herbert
Date: Wed, 14 Jul 2010 20:50:29 -0700
Subject: net: fix problem in reading sock TX queue

Fix problem in reading the tx_queue recorded in a socket.  In
dev_pick_tx, the TX queue is read by doing a check with
sk_tx_queue_recorded on the socket, followed by a sk_tx_queue_get.
The problem is that there is not mutual exclusion across these
calls in the socket so it it is possible that the queue in the
sock can be invalidated after sk_tx_queue_recorded is called so
that sk_tx_queue get returns -1, which sets 65535 in queue_index
and thus dev_pick_tx returns 65536 which is a bogus queue and
can cause crash in dev_queue_xmit.

We fix this by only calling sk_tx_queue_get which does the proper
checks.  The interface is that sk_tx_queue_get returns the TX queue
if the sock argument is non-NULL and TX queue is recorded, else it
returns -1.  sk_tx_queue_recorded is no longer used so it can be
completely removed.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 7 +------
 net/core/dev.c     | 7 +++----
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 731150d52799..0a691ea7654a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1224,12 +1224,7 @@ static inline void sk_tx_queue_clear(struct sock *sk)
 
 static inline int sk_tx_queue_get(const struct sock *sk)
 {
-	return sk->sk_tx_queue_mapping;
-}
-
-static inline bool sk_tx_queue_recorded(const struct sock *sk)
-{
-	return (sk && sk->sk_tx_queue_mapping >= 0);
+	return sk ? sk->sk_tx_queue_mapping : -1;
 }
 
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
diff --git a/net/core/dev.c b/net/core/dev.c
index 4b05fdf762ab..0ea10f849be8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2029,12 +2029,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 					struct sk_buff *skb)
 {
-	u16 queue_index;
+	int queue_index;
 	struct sock *sk = skb->sk;
 
-	if (sk_tx_queue_recorded(sk)) {
-		queue_index = sk_tx_queue_get(sk);
-	} else {
+	queue_index = sk_tx_queue_get(sk);
+	if (queue_index < 0) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 
 		if (ops->ndo_select_queue) {
-- 
cgit v1.2.3-55-g7522


From f86586fa48a94c7a9c7f36650a2e0e4a947881c9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet
Date: Thu, 15 Jul 2010 21:41:00 -0700
Subject: tcp: sizeof struct tcp_skb_cb is 44

Correct comment stating sizeof(struct tcp_skb_cb) is 36 or 40, since its
44 bytes, since commit 951dbc8ac714b04 ([IPV6]: Move nextheader offset
to the IP6CB).

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 468b01f01c13..df6a2eb20193 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -570,11 +570,10 @@ extern u32 __tcp_select_window(struct sock *sk);
 #define TCPHDR_CWR 0x80
 
 /* This is what the send packet queuing engine uses to pass
- * TCP per-packet control information to the transmission
- * code.  We also store the host-order sequence numbers in
- * here too.  This is 36 bytes on 32-bit architectures,
- * 40 bytes on 64-bit machines, if this grows please adjust
- * skbuff.h:skbuff->cb[xxx] size appropriately.
+ * TCP per-packet control information to the transmission code.
+ * We also store the host-order sequence numbers in here too.
+ * This is 44 bytes if IPV6 is enabled.
+ * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
  */
 struct tcp_skb_cb {
 	union {
-- 
cgit v1.2.3-55-g7522


From f03585689fdff4ae256edd45a35bc2dd83d3684a Mon Sep 17 00:00:00 2001
From: Johan Hedberg
Date: Tue, 18 May 2010 13:20:32 +0200
Subject: Bluetooth: Add blacklist support for incoming connections

In some circumstances it could be desirable to reject incoming
connections on the baseband level. This patch adds this feature through
two new ioctl's: HCIBLOCKADDR and HCIUNBLOCKADDR. Both take a simple
Bluetooth address as a parameter. BDADDR_ANY can be used with
HCIUNBLOCKADDR to remove all devices from the blacklist.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 fs/compat_ioctl.c                |  2 +
 include/net/bluetooth/hci.h      |  3 ++
 include/net/bluetooth/hci_core.h |  9 ++++
 net/bluetooth/hci_core.c         |  3 ++
 net/bluetooth/hci_event.c        |  2 +-
 net/bluetooth/hci_sock.c         | 90 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 108 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 641640dc7ae5..18638969a659 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1328,6 +1328,8 @@ COMPATIBLE_IOCTL(HCISETLINKPOL)
 COMPATIBLE_IOCTL(HCISETLINKMODE)
 COMPATIBLE_IOCTL(HCISETACLMTU)
 COMPATIBLE_IOCTL(HCISETSCOMTU)
+COMPATIBLE_IOCTL(HCIBLOCKADDR)
+COMPATIBLE_IOCTL(HCIUNBLOCKADDR)
 COMPATIBLE_IOCTL(HCIINQUIRY)
 COMPATIBLE_IOCTL(HCIUARTSETPROTO)
 COMPATIBLE_IOCTL(HCIUARTGETPROTO)
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index fc0c502d9fd1..ca2518e0574e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -100,6 +100,9 @@ enum {
 #define HCISETACLMTU	_IOW('H', 227, int)
 #define HCISETSCOMTU	_IOW('H', 228, int)
 
+#define HCIBLOCKADDR	_IOW('H', 230, int)
+#define HCIUNBLOCKADDR	_IOW('H', 231, int)
+
 #define HCIINQUIRY	_IOR('H', 240, int)
 
 /* HCI timeouts */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index e42f6ed5421c..ffc637748b87 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -62,6 +62,11 @@ struct hci_conn_hash {
 	unsigned int     sco_num;
 };
 
+struct bdaddr_list {
+	struct list_head list;
+	bdaddr_t bdaddr;
+};
+
 struct hci_dev {
 	struct list_head list;
 	spinlock_t	lock;
@@ -127,6 +132,7 @@ struct hci_dev {
 
 	struct inquiry_cache	inq_cache;
 	struct hci_conn_hash	conn_hash;
+	struct bdaddr_list	blacklist;
 
 	struct hci_dev_stats	stat;
 
@@ -424,6 +430,9 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg);
 int hci_get_auth_info(struct hci_dev *hdev, void __user *arg);
 int hci_inquiry(void __user *arg);
 
+struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr);
+int hci_blacklist_clear(struct hci_dev *hdev);
+
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);
 
 int hci_recv_frame(struct sk_buff *skb);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2f768de87011..aeb2982310a0 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -562,6 +562,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	hci_dev_lock_bh(hdev);
 	inquiry_cache_flush(hdev);
 	hci_conn_hash_flush(hdev);
+	hci_blacklist_clear(hdev);
 	hci_dev_unlock_bh(hdev);
 
 	hci_notify(hdev, HCI_DEV_DOWN);
@@ -923,6 +924,8 @@ int hci_register_dev(struct hci_dev *hdev)
 
 	hci_conn_hash_init(hdev);
 
+	INIT_LIST_HEAD(&hdev->blacklist.list);
+
 	memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));
 
 	atomic_set(&hdev->promisc, 0);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 786b5de0bac4..43feeef3c498 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -952,7 +952,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
 
 	mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type);
 
-	if (mask & HCI_LM_ACCEPT) {
+	if ((mask & HCI_LM_ACCEPT) && !hci_blacklist_lookup(hdev, &ev->bdaddr)) {
 		/* Connection accepted */
 		struct inquiry_entry *ie;
 		struct hci_conn *conn;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 38f08f6b86f6..4f170a595934 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -165,6 +165,86 @@ static int hci_sock_release(struct socket *sock)
 	return 0;
 }
 
+struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr)
+{
+	struct list_head *p;
+	struct bdaddr_list *blacklist = &hdev->blacklist;
+
+	list_for_each(p, &blacklist->list) {
+		struct bdaddr_list *b;
+
+		b = list_entry(p, struct bdaddr_list, list);
+
+		if (bacmp(bdaddr, &b->bdaddr) == 0)
+			return b;
+	}
+
+	return NULL;
+}
+
+static int hci_blacklist_add(struct hci_dev *hdev, void __user *arg)
+{
+	bdaddr_t bdaddr;
+	struct bdaddr_list *entry;
+
+	if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
+		return -EFAULT;
+
+	if (bacmp(&bdaddr, BDADDR_ANY) == 0)
+		return -EBADF;
+
+	if (hci_blacklist_lookup(hdev, &bdaddr))
+		return -EEXIST;
+
+	entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	bacpy(&entry->bdaddr, &bdaddr);
+
+	list_add(&entry->list, &hdev->blacklist.list);
+
+	return 0;
+}
+
+int hci_blacklist_clear(struct hci_dev *hdev)
+{
+	struct list_head *p, *n;
+	struct bdaddr_list *blacklist = &hdev->blacklist;
+
+	list_for_each_safe(p, n, &blacklist->list) {
+		struct bdaddr_list *b;
+
+		b = list_entry(p, struct bdaddr_list, list);
+
+		list_del(p);
+		kfree(b);
+	}
+
+	return 0;
+}
+
+static int hci_blacklist_del(struct hci_dev *hdev, void __user *arg)
+{
+	bdaddr_t bdaddr;
+	struct bdaddr_list *entry;
+
+	if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
+		return -EFAULT;
+
+	if (bacmp(&bdaddr, BDADDR_ANY) == 0)
+		return hci_blacklist_clear(hdev);
+
+	entry = hci_blacklist_lookup(hdev, &bdaddr);
+	if (!entry)
+		return -ENOENT;
+
+	list_del(&entry->list);
+	kfree(entry);
+
+	return 0;
+}
+
 /* Ioctls that require bound socket */
 static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
 {
@@ -194,6 +274,16 @@ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsign
 	case HCIGETAUTHINFO:
 		return hci_get_auth_info(hdev, (void __user *) arg);
 
+	case HCIBLOCKADDR:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		return hci_blacklist_add(hdev, (void __user *) arg);
+
+	case HCIUNBLOCKADDR:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		return hci_blacklist_del(hdev, (void __user *) arg);
+
 	default:
 		if (hdev->ioctl)
 			return hdev->ioctl(hdev, cmd, arg);
-- 
cgit v1.2.3-55-g7522


From 04fafe4ed76f5547856f4c464bec5bf948afb498 Mon Sep 17 00:00:00 2001
From: Ron Shaffer
Date: Fri, 28 May 2010 11:53:45 -0400
Subject: Bluetooth: Remove extraneous white space

Deleted extraneous white space from the end of several lines

Signed-off-by: Ron Shaffer <rshaffer@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ffc637748b87..ab6d6186a460 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1,4 +1,4 @@
-/* 
+/*
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
 
@@ -12,13 +12,13 @@
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
    IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
-   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES 
-   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 
-   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
    OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
-   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, 
-   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS 
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
    SOFTWARE IS DISCLAIMED.
 */
 
@@ -386,7 +386,7 @@ static inline void __hci_dev_put(struct hci_dev *d)
 }
 
 static inline void hci_dev_put(struct hci_dev *d)
-{ 
+{
 	__hci_dev_put(d);
 	module_put(d->owner);
 }
-- 
cgit v1.2.3-55-g7522


From 2d0a03460a8a0c611843500735096ff799aa8510 Mon Sep 17 00:00:00 2001
From: Ron Shaffer
Date: Fri, 28 May 2010 11:53:46 -0400
Subject: Bluetooth: Reassigned copyright to Code Aurora Forum

Qualcomm, Inc. has reassigned rights to Code Aurora Forum. Accordingly,
as files are modified by Code Aurora Forum members, the copyright
statement will be updated.

Signed-off-by: Ron Shaffer <rshaffer@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 2 +-
 net/bluetooth/hci_conn.c         | 2 +-
 net/bluetooth/hci_event.c        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ab6d6186a460..600372d44578 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1,6 +1,6 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
-   Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 800b6b9fbbae..e9fef83449f8 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1,6 +1,6 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
-   Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 43feeef3c498..a96980022305 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1,6 +1,6 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
-   Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
-- 
cgit v1.2.3-55-g7522


From cf6c2c0b9f47ee3cd12684b905725c8376d52135 Mon Sep 17 00:00:00 2001
From: Gustavo F. Padovan
Date: Mon, 7 Jun 2010 20:54:45 -0300
Subject: Bluetooth: Disconnect early if mode is not supported

When mode is mandatory we shall not send connect request and report this
to the userspace as well.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  5 ++++
 net/bluetooth/l2cap.c         | 56 ++++++++++++++++++++++++++++++-------------
 2 files changed, 45 insertions(+), 16 deletions(-)

(limited to 'include/net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 7c695bfd853c..f8bae5415438 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -287,6 +287,11 @@ struct l2cap_conn {
 	struct l2cap_chan_list chan_list;
 };
 
+struct sock_del_list {
+	struct sock *sk;
+	struct list_head list;
+};
+
 #define L2CAP_INFO_CL_MTU_REQ_SENT	0x01
 #define L2CAP_INFO_FEAT_MASK_REQ_SENT	0x04
 #define L2CAP_INFO_FEAT_MASK_REQ_DONE	0x08
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 2fb45c481762..6a33d269389e 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -456,6 +456,22 @@ static void l2cap_do_start(struct sock *sk)
 	}
 }
 
+static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
+{
+	u32 local_feat_mask = l2cap_feat_mask;
+	if (enable_ertm)
+		local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
+
+	switch (mode) {
+	case L2CAP_MODE_ERTM:
+		return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
+	case L2CAP_MODE_STREAMING:
+		return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
+	default:
+		return 0x00;
+	}
+}
+
 static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err)
 {
 	struct l2cap_disconn_req req;
@@ -484,10 +500,13 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int
 static void l2cap_conn_start(struct l2cap_conn *conn)
 {
 	struct l2cap_chan_list *l = &conn->chan_list;
+	struct sock_del_list del, *tmp1, *tmp2;
 	struct sock *sk;
 
 	BT_DBG("conn %p", conn);
 
+	INIT_LIST_HEAD(&del.list);
+
 	read_lock(&l->lock);
 
 	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
@@ -503,6 +522,19 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 			if (l2cap_check_security(sk) &&
 					__l2cap_no_conn_pending(sk)) {
 				struct l2cap_conn_req req;
+
+				if (!l2cap_mode_supported(l2cap_pi(sk)->mode,
+						conn->feat_mask)
+						&& l2cap_pi(sk)->conf_state &
+						L2CAP_CONF_STATE2_DEVICE) {
+					tmp1 = kzalloc(sizeof(struct srej_list),
+							GFP_ATOMIC);
+					tmp1->sk = sk;
+					list_add_tail(&tmp1->list, &del.list);
+					bh_unlock_sock(sk);
+					continue;
+				}
+
 				req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 				req.psm  = l2cap_pi(sk)->psm;
 
@@ -542,6 +574,14 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 	}
 
 	read_unlock(&l->lock);
+
+	list_for_each_entry_safe(tmp1, tmp2, &del.list, list) {
+		bh_lock_sock(tmp1->sk);
+		__l2cap_sock_close(tmp1->sk, ECONNRESET);
+		bh_unlock_sock(tmp1->sk);
+		list_del(&tmp1->list);
+		kfree(tmp1);
+	}
 }
 
 static void l2cap_conn_ready(struct l2cap_conn *conn)
@@ -2429,22 +2469,6 @@ static inline void l2cap_ertm_init(struct sock *sk)
 	INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
 }
 
-static int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
-{
-	u32 local_feat_mask = l2cap_feat_mask;
-	if (enable_ertm)
-		local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
-
-	switch (mode) {
-	case L2CAP_MODE_ERTM:
-		return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
-	case L2CAP_MODE_STREAMING:
-		return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
-	default:
-		return 0x00;
-	}
-}
-
 static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
 {
 	switch (mode) {
-- 
cgit v1.2.3-55-g7522


From e0f66218b3a7d0bcf37ca95186123c257fda0ba5 Mon Sep 17 00:00:00 2001
From: Gustavo F. Padovan
Date: Mon, 21 Jun 2010 18:50:49 -0300
Subject: Bluetooth: Remove the send_lock spinlock from ERTM

Using a lock to deal with the ERTM race condition - interruption with
new data from the hci layer - is wrong. We should use the native skb
backlog queue.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  1 -
 net/bluetooth/l2cap.c         | 28 +---------------------------
 2 files changed, 1 insertion(+), 28 deletions(-)

(limited to 'include/net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index f8bae5415438..7df70e47c878 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -358,7 +358,6 @@ struct l2cap_pinfo {
 
 	__le16		sport;
 
-	spinlock_t		send_lock;
 	struct timer_list	retrans_timer;
 	struct timer_list	monitor_timer;
 	struct timer_list	ack_timer;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f6e46fdddd2b..dc8601fc2404 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1562,16 +1562,11 @@ static int l2cap_retransmit_frames(struct sock *sk)
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	int ret;
 
-	spin_lock_bh(&pi->send_lock);
-
 	if (!skb_queue_empty(TX_QUEUE(sk)))
 		sk->sk_send_head = TX_QUEUE(sk)->next;
 
 	pi->next_tx_seq = pi->expected_ack_seq;
 	ret = l2cap_ertm_send(sk);
-
-	spin_unlock_bh(&pi->send_lock);
-
 	return ret;
 }
 
@@ -1579,7 +1574,6 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi)
 {
 	struct sock *sk = (struct sock *)pi;
 	u16 control = 0;
-	int nframes;
 
 	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
 
@@ -1590,11 +1584,7 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi)
 		return;
 	}
 
-	spin_lock_bh(&pi->send_lock);
-	nframes = l2cap_ertm_send(sk);
-	spin_unlock_bh(&pi->send_lock);
-
-	if (nframes > 0)
+	if (l2cap_ertm_send(sk) > 0)
 		return;
 
 	control |= L2CAP_SUPER_RCV_READY;
@@ -1789,10 +1779,8 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 		size += buflen;
 	}
 	skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk));
-	spin_lock_bh(&pi->send_lock);
 	if (sk->sk_send_head == NULL)
 		sk->sk_send_head = sar_queue.next;
-	spin_unlock_bh(&pi->send_lock);
 
 	return size;
 }
@@ -1864,14 +1852,9 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 			}
 			__skb_queue_tail(TX_QUEUE(sk), skb);
 
-			if (pi->mode == L2CAP_MODE_ERTM)
-				spin_lock_bh(&pi->send_lock);
-
 			if (sk->sk_send_head == NULL)
 				sk->sk_send_head = skb;
 
-			if (pi->mode == L2CAP_MODE_ERTM)
-				spin_unlock_bh(&pi->send_lock);
 		} else {
 		/* Segment SDU into multiples PDUs */
 			err = l2cap_sar_segment_sdu(sk, msg, len);
@@ -1887,9 +1870,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 				err = len;
 				break;
 			}
-			spin_lock_bh(&pi->send_lock);
 			err = l2cap_ertm_send(sk);
-			spin_unlock_bh(&pi->send_lock);
 		}
 
 		if (err >= 0)
@@ -2464,7 +2445,6 @@ static inline void l2cap_ertm_init(struct sock *sk)
 
 	__skb_queue_head_init(SREJ_QUEUE(sk));
 	__skb_queue_head_init(BUSY_QUEUE(sk));
-	spin_lock_init(&l2cap_pi(sk)->send_lock);
 
 	INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
 }
@@ -3462,9 +3442,7 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY)
 		l2cap_retransmit_frames(sk);
 
-	spin_lock_bh(&pi->send_lock);
 	l2cap_ertm_send(sk);
-	spin_unlock_bh(&pi->send_lock);
 
 	if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) &&
 			pi->frames_sent == 0) {
@@ -4066,9 +4044,7 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 		if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
 			l2cap_send_ack(pi);
 		} else {
-			spin_lock_bh(&pi->send_lock);
 			l2cap_ertm_send(sk);
-			spin_unlock_bh(&pi->send_lock);
 		}
 	}
 }
@@ -4113,9 +4089,7 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
 		pi->conn_state |= L2CAP_CONN_SEND_FBIT;
 		l2cap_retransmit_one_frame(sk, tx_seq);
 
-		spin_lock_bh(&pi->send_lock);
 		l2cap_ertm_send(sk);
-		spin_unlock_bh(&pi->send_lock);
 
 		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
 			pi->srej_save_reqseq = tx_seq;
-- 
cgit v1.2.3-55-g7522


From ce5706bd69be6b25715ed6cd48a210b5080032bc Mon Sep 17 00:00:00 2001
From: Gustavo F. Padovan
Date: Tue, 13 Jul 2010 11:57:11 -0300
Subject: Bluetooth: Add Copyright notice to L2CAP

Copyright for the time I worked on L2CAP during the Google Summer of Code
program.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 1 +
 net/bluetooth/l2cap.c         | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 7df70e47c878..d0303021aba7 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -1,6 +1,7 @@
 /* 
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 449cbdd4ddbb..67a6f59873aa 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1,6 +1,7 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
-- 
cgit v1.2.3-55-g7522


From cd4c53919ed50b0d532f106aeb76e79077bece98 Mon Sep 17 00:00:00 2001
From: Suraj Sumangala
Date: Wed, 14 Jul 2010 13:02:16 +0530
Subject: Bluetooth: Add one more buffer for HCI stream reassembly

Additional reassembly buffer to keep track of stream reasembly

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 4 ++--
 net/bluetooth/hci_core.c         | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 600372d44578..28e5eeefdec8 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -66,7 +66,7 @@ struct bdaddr_list {
 	struct list_head list;
 	bdaddr_t bdaddr;
 };
-
+#define NUM_REASSEMBLY 4
 struct hci_dev {
 	struct list_head list;
 	spinlock_t	lock;
@@ -123,7 +123,7 @@ struct hci_dev {
 	struct sk_buff_head	cmd_q;
 
 	struct sk_buff		*sent_cmd;
-	struct sk_buff		*reassembly[3];
+	struct sk_buff		*reassembly[NUM_REASSEMBLY];
 
 	struct mutex		req_lock;
 	wait_queue_head_t	req_wait_q;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index aeb2982310a0..0ded790bfb67 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -914,7 +914,7 @@ int hci_register_dev(struct hci_dev *hdev)
 	skb_queue_head_init(&hdev->cmd_q);
 	skb_queue_head_init(&hdev->raw_q);
 
-	for (i = 0; i < 3; i++)
+	for (i = 0; i < NUM_REASSEMBLY; i++)
 		hdev->reassembly[i] = NULL;
 
 	init_waitqueue_head(&hdev->req_wait_q);
@@ -973,7 +973,7 @@ int hci_unregister_dev(struct hci_dev *hdev)
 
 	hci_dev_do_close(hdev);
 
-	for (i = 0; i < 3; i++)
+	for (i = 0; i < NUM_REASSEMBLY; i++)
 		kfree_skb(hdev->reassembly[i]);
 
 	hci_notify(hdev, HCI_DEV_UNREG);
@@ -1034,7 +1034,7 @@ int hci_recv_frame(struct sk_buff *skb)
 EXPORT_SYMBOL(hci_recv_frame);
 
 /* Receive packet type fragment */
-#define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 2])
+#define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 1])
 
 int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
 {
-- 
cgit v1.2.3-55-g7522


From 33e882a5f2301a23a85ef2994e30fd9f48d39d9b Mon Sep 17 00:00:00 2001
From: Suraj Sumangala
Date: Wed, 14 Jul 2010 13:02:17 +0530
Subject: Bluetooth: Implement hci_reassembly helper to reassemble RX packets

Implements feature to reassemble received HCI frames from any input stream

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |   1 +
 net/bluetooth/hci_core.c          | 109 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index ff77e8f882f1..d6b150c679ff 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -138,6 +138,7 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock);
 struct bt_skb_cb {
 	__u8 pkt_type;
 	__u8 incoming;
+	__u16 expect;
 	__u8 tx_seq;
 	__u8 retries;
 	__u8 sar;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0ded790bfb67..477c4a60a079 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1033,6 +1033,115 @@ int hci_recv_frame(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(hci_recv_frame);
 
+static int hci_reassembly(struct hci_dev *hdev, int type, void *data,
+			  int count, __u8 index, gfp_t gfp_mask)
+{
+	int len = 0;
+	int hlen = 0;
+	int remain = count;
+	struct sk_buff *skb;
+	struct bt_skb_cb *scb;
+
+	if ((type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) ||
+				index >= NUM_REASSEMBLY)
+		return -EILSEQ;
+
+	skb = hdev->reassembly[index];
+
+	if (!skb) {
+		switch (type) {
+		case HCI_ACLDATA_PKT:
+			len = HCI_MAX_FRAME_SIZE;
+			hlen = HCI_ACL_HDR_SIZE;
+			break;
+		case HCI_EVENT_PKT:
+			len = HCI_MAX_EVENT_SIZE;
+			hlen = HCI_EVENT_HDR_SIZE;
+			break;
+		case HCI_SCODATA_PKT:
+			len = HCI_MAX_SCO_SIZE;
+			hlen = HCI_SCO_HDR_SIZE;
+			break;
+		}
+
+		skb = bt_skb_alloc(len, gfp_mask);
+		if (!skb)
+			return -ENOMEM;
+
+		scb = (void *) skb->cb;
+		scb->expect = hlen;
+		scb->pkt_type = type;
+
+		skb->dev = (void *) hdev;
+		hdev->reassembly[index] = skb;
+	}
+
+	while (count) {
+		scb = (void *) skb->cb;
+		len = min(scb->expect, (__u16)count);
+
+		memcpy(skb_put(skb, len), data, len);
+
+		count -= len;
+		data += len;
+		scb->expect -= len;
+		remain = count;
+
+		switch (type) {
+		case HCI_EVENT_PKT:
+			if (skb->len == HCI_EVENT_HDR_SIZE) {
+				struct hci_event_hdr *h = hci_event_hdr(skb);
+				scb->expect = h->plen;
+
+				if (skb_tailroom(skb) < scb->expect) {
+					kfree_skb(skb);
+					hdev->reassembly[index] = NULL;
+					return -ENOMEM;
+				}
+			}
+			break;
+
+		case HCI_ACLDATA_PKT:
+			if (skb->len  == HCI_ACL_HDR_SIZE) {
+				struct hci_acl_hdr *h = hci_acl_hdr(skb);
+				scb->expect = __le16_to_cpu(h->dlen);
+
+				if (skb_tailroom(skb) < scb->expect) {
+					kfree_skb(skb);
+					hdev->reassembly[index] = NULL;
+					return -ENOMEM;
+				}
+			}
+			break;
+
+		case HCI_SCODATA_PKT:
+			if (skb->len == HCI_SCO_HDR_SIZE) {
+				struct hci_sco_hdr *h = hci_sco_hdr(skb);
+				scb->expect = h->dlen;
+
+				if (skb_tailroom(skb) < scb->expect) {
+					kfree_skb(skb);
+					hdev->reassembly[index] = NULL;
+					return -ENOMEM;
+				}
+			}
+			break;
+		}
+
+		if (scb->expect == 0) {
+			/* Complete frame */
+
+			bt_cb(skb)->pkt_type = type;
+			hci_recv_frame(skb);
+
+			hdev->reassembly[index] = NULL;
+			return remain;
+		}
+	}
+
+	return remain;
+}
+
 /* Receive packet type fragment */
 #define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 1])
 
-- 
cgit v1.2.3-55-g7522


From 9981151086385eecc2febf4ba95a14593f834b3d Mon Sep 17 00:00:00 2001
From: Suraj Sumangala
Date: Wed, 14 Jul 2010 13:02:19 +0530
Subject: Bluetooth: Implemented HCI frame reassembly for RX from stream

Implemented frame reassembly implementation for reassembling fragments
received from stream.

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_core.c         | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 28e5eeefdec8..350b3e6964bd 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -437,6 +437,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);
 
 int hci_recv_frame(struct sk_buff *skb);
 int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count);
+int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count);
 
 int hci_register_sysfs(struct hci_dev *hdev);
 void hci_unregister_sysfs(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 451e266840ad..995c9f9b84d0 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1163,6 +1163,41 @@ int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
 }
 EXPORT_SYMBOL(hci_recv_fragment);
 
+#define STREAM_REASSEMBLY 0
+
+int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count)
+{
+	int type;
+	int rem = 0;
+
+	do {
+		struct sk_buff *skb = hdev->reassembly[STREAM_REASSEMBLY];
+
+		if (!skb) {
+			struct { char type; } *pkt;
+
+			/* Start of the frame */
+			pkt = data;
+			type = pkt->type;
+
+			data++;
+			count--;
+		} else
+			type = bt_cb(skb)->pkt_type;
+
+		rem = hci_reassembly(hdev, type, data,
+					count, STREAM_REASSEMBLY, GFP_ATOMIC);
+		if (rem < 0)
+			return rem;
+
+		data += (count - rem);
+		count = rem;
+	} while (count);
+
+	return rem;
+}
+EXPORT_SYMBOL(hci_recv_stream_fragment);
+
 /* ---- Interface to upper protocols ---- */
 
 /* Register/Unregister protocols.
-- 
cgit v1.2.3-55-g7522


From 5d8868ff3d11e3fc5a5c07477f281a16c71714a3 Mon Sep 17 00:00:00 2001
From: Gustavo F. Padovan
Date: Fri, 16 Jul 2010 16:18:39 -0300
Subject: Bluetooth: Add Google's copyright to L2CAP

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 1 +
 net/bluetooth/l2cap.c         | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index d0303021aba7..4b86c783976c 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -2,6 +2,7 @@
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
    Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
+   Copyright (C) 2010 Google Inc.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index cdd608d72741..d175cc262833 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2,6 +2,7 @@
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
    Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
+   Copyright (C) 2010 Google Inc.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
-- 
cgit v1.2.3-55-g7522


From 66c853cc21bd387a9a2109dcf3b3d53cc9ee9edf Mon Sep 17 00:00:00 2001
From: Gustavo F. Padovan
Date: Mon, 19 Jul 2010 02:00:13 -0300
Subject: Bluetooth: Use __packed annotation

To make net/ and include/net/ code consistent use __packed instead of
__attribute__ ((packed)). Bluetooth subsystem was one of the last net
subsys still using __attribute__ ((packed)).

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |   2 +-
 include/net/bluetooth/hci.h       | 184 +++++++++++++++++++-------------------
 include/net/bluetooth/l2cap.h     |  26 +++---
 include/net/bluetooth/rfcomm.h    |  14 +--
 4 files changed, 113 insertions(+), 113 deletions(-)

(limited to 'include/net')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index d6b150c679ff..27a902d9b3a9 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -84,7 +84,7 @@ enum {
 /* BD Address */
 typedef struct {
 	__u8 b[6];
-} __attribute__((packed)) bdaddr_t;
+} __packed bdaddr_t;
 
 #define BDADDR_ANY   (&(bdaddr_t) {{0, 0, 0, 0, 0, 0}})
 #define BDADDR_LOCAL (&(bdaddr_t) {{0, 0, 0, 0xff, 0xff, 0xff}})
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index ca2518e0574e..bcbdd6d4e6dd 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -230,7 +230,7 @@ struct hci_cp_inquiry {
 	__u8     lap[3];
 	__u8     length;
 	__u8     num_rsp;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_INQUIRY_CANCEL		0x0402
 
@@ -244,81 +244,81 @@ struct hci_cp_create_conn {
 	__u8     pscan_mode;
 	__le16   clock_offset;
 	__u8     role_switch;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_DISCONNECT		0x0406
 struct hci_cp_disconnect {
 	__le16   handle;
 	__u8     reason;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_ADD_SCO			0x0407
 struct hci_cp_add_sco {
 	__le16   handle;
 	__le16   pkt_type;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_CREATE_CONN_CANCEL	0x0408
 struct hci_cp_create_conn_cancel {
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_ACCEPT_CONN_REQ		0x0409
 struct hci_cp_accept_conn_req {
 	bdaddr_t bdaddr;
 	__u8     role;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_REJECT_CONN_REQ		0x040a
 struct hci_cp_reject_conn_req {
 	bdaddr_t bdaddr;
 	__u8     reason;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_LINK_KEY_REPLY		0x040b
 struct hci_cp_link_key_reply {
 	bdaddr_t bdaddr;
 	__u8     link_key[16];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_LINK_KEY_NEG_REPLY	0x040c
 struct hci_cp_link_key_neg_reply {
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_PIN_CODE_REPLY		0x040d
 struct hci_cp_pin_code_reply {
 	bdaddr_t bdaddr;
 	__u8     pin_len;
 	__u8     pin_code[16];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_PIN_CODE_NEG_REPLY	0x040e
 struct hci_cp_pin_code_neg_reply {
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_CHANGE_CONN_PTYPE	0x040f
 struct hci_cp_change_conn_ptype {
 	__le16   handle;
 	__le16   pkt_type;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_AUTH_REQUESTED		0x0411
 struct hci_cp_auth_requested {
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_SET_CONN_ENCRYPT		0x0413
 struct hci_cp_set_conn_encrypt {
 	__le16   handle;
 	__u8     encrypt;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_CHANGE_CONN_LINK_KEY	0x0415
 struct hci_cp_change_conn_link_key {
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_REMOTE_NAME_REQ		0x0419
 struct hci_cp_remote_name_req {
@@ -326,28 +326,28 @@ struct hci_cp_remote_name_req {
 	__u8     pscan_rep_mode;
 	__u8     pscan_mode;
 	__le16   clock_offset;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_REMOTE_NAME_REQ_CANCEL	0x041a
 struct hci_cp_remote_name_req_cancel {
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_REMOTE_FEATURES	0x041b
 struct hci_cp_read_remote_features {
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_REMOTE_EXT_FEATURES	0x041c
 struct hci_cp_read_remote_ext_features {
 	__le16   handle;
 	__u8     page;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_REMOTE_VERSION	0x041d
 struct hci_cp_read_remote_version {
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_SETUP_SYNC_CONN		0x0428
 struct hci_cp_setup_sync_conn {
@@ -358,7 +358,7 @@ struct hci_cp_setup_sync_conn {
 	__le16   voice_setting;
 	__u8     retrans_effort;
 	__le16   pkt_type;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_ACCEPT_SYNC_CONN_REQ	0x0429
 struct hci_cp_accept_sync_conn_req {
@@ -369,13 +369,13 @@ struct hci_cp_accept_sync_conn_req {
 	__le16   content_format;
 	__u8     retrans_effort;
 	__le16   pkt_type;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_REJECT_SYNC_CONN_REQ	0x042a
 struct hci_cp_reject_sync_conn_req {
 	bdaddr_t bdaddr;
 	__u8     reason;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_SNIFF_MODE		0x0803
 struct hci_cp_sniff_mode {
@@ -384,59 +384,59 @@ struct hci_cp_sniff_mode {
 	__le16   min_interval;
 	__le16   attempt;
 	__le16   timeout;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_EXIT_SNIFF_MODE		0x0804
 struct hci_cp_exit_sniff_mode {
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_ROLE_DISCOVERY		0x0809
 struct hci_cp_role_discovery {
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 struct hci_rp_role_discovery {
 	__u8     status;
 	__le16   handle;
 	__u8     role;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_SWITCH_ROLE		0x080b
 struct hci_cp_switch_role {
 	bdaddr_t bdaddr;
 	__u8     role;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_LINK_POLICY		0x080c
 struct hci_cp_read_link_policy {
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 struct hci_rp_read_link_policy {
 	__u8     status;
 	__le16   handle;
 	__le16   policy;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_WRITE_LINK_POLICY	0x080d
 struct hci_cp_write_link_policy {
 	__le16   handle;
 	__le16   policy;
-} __attribute__ ((packed));
+} __packed;
 struct hci_rp_write_link_policy {
 	__u8     status;
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_DEF_LINK_POLICY	0x080e
 struct hci_rp_read_def_link_policy {
 	__u8     status;
 	__le16   policy;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_WRITE_DEF_LINK_POLICY	0x080f
 struct hci_cp_write_def_link_policy {
 	__le16   policy;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_SNIFF_SUBRATE		0x0811
 struct hci_cp_sniff_subrate {
@@ -444,12 +444,12 @@ struct hci_cp_sniff_subrate {
 	__le16   max_latency;
 	__le16   min_remote_timeout;
 	__le16   min_local_timeout;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_SET_EVENT_MASK		0x0c01
 struct hci_cp_set_event_mask {
 	__u8     mask[8];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_RESET			0x0c03
 
@@ -458,7 +458,7 @@ struct hci_cp_set_event_flt {
 	__u8     flt_type;
 	__u8     cond_type;
 	__u8     condition[0];
-} __attribute__ ((packed));
+} __packed;
 
 /* Filter types */
 #define HCI_FLT_CLEAR_ALL	0x00
@@ -477,13 +477,13 @@ struct hci_cp_set_event_flt {
 #define HCI_OP_WRITE_LOCAL_NAME		0x0c13
 struct hci_cp_write_local_name {
 	__u8     name[248];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_LOCAL_NAME		0x0c14
 struct hci_rp_read_local_name {
 	__u8     status;
 	__u8     name[248];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_WRITE_CA_TIMEOUT		0x0c16
 
@@ -511,23 +511,23 @@ struct hci_rp_read_local_name {
 struct hci_rp_read_class_of_dev {
 	__u8     status;
 	__u8     dev_class[3];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_WRITE_CLASS_OF_DEV	0x0c24
 struct hci_cp_write_class_of_dev {
 	__u8     dev_class[3];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_VOICE_SETTING	0x0c25
 struct hci_rp_read_voice_setting {
 	__u8     status;
 	__le16   voice_setting;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_WRITE_VOICE_SETTING	0x0c26
 struct hci_cp_write_voice_setting {
 	__le16   voice_setting;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_HOST_BUFFER_SIZE		0x0c33
 struct hci_cp_host_buffer_size {
@@ -535,18 +535,18 @@ struct hci_cp_host_buffer_size {
 	__u8     sco_mtu;
 	__le16   acl_max_pkt;
 	__le16   sco_max_pkt;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_SSP_MODE		0x0c55
 struct hci_rp_read_ssp_mode {
 	__u8     status;
 	__u8     mode;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_WRITE_SSP_MODE		0x0c56
 struct hci_cp_write_ssp_mode {
 	__u8     mode;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_LOCAL_VERSION	0x1001
 struct hci_rp_read_local_version {
@@ -556,19 +556,19 @@ struct hci_rp_read_local_version {
 	__u8     lmp_ver;
 	__le16   manufacturer;
 	__le16   lmp_subver;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_LOCAL_COMMANDS	0x1002
 struct hci_rp_read_local_commands {
 	__u8     status;
 	__u8     commands[64];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_LOCAL_FEATURES	0x1003
 struct hci_rp_read_local_features {
 	__u8     status;
 	__u8     features[8];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_LOCAL_EXT_FEATURES	0x1004
 struct hci_rp_read_local_ext_features {
@@ -576,7 +576,7 @@ struct hci_rp_read_local_ext_features {
 	__u8     page;
 	__u8     max_page;
 	__u8     features[8];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_BUFFER_SIZE		0x1005
 struct hci_rp_read_buffer_size {
@@ -585,13 +585,13 @@ struct hci_rp_read_buffer_size {
 	__u8     sco_mtu;
 	__le16   acl_max_pkt;
 	__le16   sco_max_pkt;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_OP_READ_BD_ADDR		0x1009
 struct hci_rp_read_bd_addr {
 	__u8     status;
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
@@ -604,7 +604,7 @@ struct inquiry_info {
 	__u8     pscan_mode;
 	__u8     dev_class[3];
 	__le16   clock_offset;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_CONN_COMPLETE		0x03
 struct hci_ev_conn_complete {
@@ -613,54 +613,54 @@ struct hci_ev_conn_complete {
 	bdaddr_t bdaddr;
 	__u8     link_type;
 	__u8     encr_mode;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_CONN_REQUEST		0x04
 struct hci_ev_conn_request {
 	bdaddr_t bdaddr;
 	__u8     dev_class[3];
 	__u8     link_type;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_DISCONN_COMPLETE		0x05
 struct hci_ev_disconn_complete {
 	__u8     status;
 	__le16   handle;
 	__u8     reason;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_AUTH_COMPLETE		0x06
 struct hci_ev_auth_complete {
 	__u8     status;
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_REMOTE_NAME		0x07
 struct hci_ev_remote_name {
 	__u8     status;
 	bdaddr_t bdaddr;
 	__u8     name[248];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_ENCRYPT_CHANGE		0x08
 struct hci_ev_encrypt_change {
 	__u8     status;
 	__le16   handle;
 	__u8     encrypt;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_CHANGE_LINK_KEY_COMPLETE	0x09
 struct hci_ev_change_link_key_complete {
 	__u8     status;
 	__le16   handle;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_REMOTE_FEATURES		0x0b
 struct hci_ev_remote_features {
 	__u8     status;
 	__le16   handle;
 	__u8     features[8];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_REMOTE_VERSION		0x0c
 struct hci_ev_remote_version {
@@ -669,7 +669,7 @@ struct hci_ev_remote_version {
 	__u8     lmp_ver;
 	__le16   manufacturer;
 	__le16   lmp_subver;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_QOS_SETUP_COMPLETE	0x0d
 struct hci_qos {
@@ -678,38 +678,38 @@ struct hci_qos {
 	__u32    peak_bandwidth;
 	__u32    latency;
 	__u32    delay_variation;
-} __attribute__ ((packed));
+} __packed;
 struct hci_ev_qos_setup_complete {
 	__u8     status;
 	__le16   handle;
 	struct   hci_qos qos;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_CMD_COMPLETE		0x0e
 struct hci_ev_cmd_complete {
 	__u8     ncmd;
 	__le16   opcode;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_CMD_STATUS		0x0f
 struct hci_ev_cmd_status {
 	__u8     status;
 	__u8     ncmd;
 	__le16   opcode;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_ROLE_CHANGE		0x12
 struct hci_ev_role_change {
 	__u8     status;
 	bdaddr_t bdaddr;
 	__u8     role;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_NUM_COMP_PKTS		0x13
 struct hci_ev_num_comp_pkts {
 	__u8     num_hndl;
 	/* variable length part */
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_MODE_CHANGE		0x14
 struct hci_ev_mode_change {
@@ -717,44 +717,44 @@ struct hci_ev_mode_change {
 	__le16   handle;
 	__u8     mode;
 	__le16   interval;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_PIN_CODE_REQ		0x16
 struct hci_ev_pin_code_req {
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_LINK_KEY_REQ		0x17
 struct hci_ev_link_key_req {
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_LINK_KEY_NOTIFY		0x18
 struct hci_ev_link_key_notify {
 	bdaddr_t bdaddr;
 	__u8     link_key[16];
 	__u8     key_type;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_CLOCK_OFFSET		0x1c
 struct hci_ev_clock_offset {
 	__u8     status;
 	__le16   handle;
 	__le16   clock_offset;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_PKT_TYPE_CHANGE		0x1d
 struct hci_ev_pkt_type_change {
 	__u8     status;
 	__le16   handle;
 	__le16   pkt_type;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_PSCAN_REP_MODE		0x20
 struct hci_ev_pscan_rep_mode {
 	bdaddr_t bdaddr;
 	__u8     pscan_rep_mode;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_INQUIRY_RESULT_WITH_RSSI	0x22
 struct inquiry_info_with_rssi {
@@ -764,7 +764,7 @@ struct inquiry_info_with_rssi {
 	__u8     dev_class[3];
 	__le16   clock_offset;
 	__s8     rssi;
-} __attribute__ ((packed));
+} __packed;
 struct inquiry_info_with_rssi_and_pscan_mode {
 	bdaddr_t bdaddr;
 	__u8     pscan_rep_mode;
@@ -773,7 +773,7 @@ struct inquiry_info_with_rssi_and_pscan_mode {
 	__u8     dev_class[3];
 	__le16   clock_offset;
 	__s8     rssi;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_REMOTE_EXT_FEATURES	0x23
 struct hci_ev_remote_ext_features {
@@ -782,7 +782,7 @@ struct hci_ev_remote_ext_features {
 	__u8     page;
 	__u8     max_page;
 	__u8     features[8];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_SYNC_CONN_COMPLETE	0x2c
 struct hci_ev_sync_conn_complete {
@@ -795,7 +795,7 @@ struct hci_ev_sync_conn_complete {
 	__le16   rx_pkt_len;
 	__le16   tx_pkt_len;
 	__u8     air_mode;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_SYNC_CONN_CHANGED	0x2d
 struct hci_ev_sync_conn_changed {
@@ -805,7 +805,7 @@ struct hci_ev_sync_conn_changed {
 	__u8     retrans_window;
 	__le16   rx_pkt_len;
 	__le16   tx_pkt_len;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_SNIFF_SUBRATE		0x2e
 struct hci_ev_sniff_subrate {
@@ -815,7 +815,7 @@ struct hci_ev_sniff_subrate {
 	__le16   max_rx_latency;
 	__le16   max_remote_timeout;
 	__le16   max_local_timeout;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_EXTENDED_INQUIRY_RESULT	0x2f
 struct extended_inquiry_info {
@@ -826,37 +826,37 @@ struct extended_inquiry_info {
 	__le16   clock_offset;
 	__s8     rssi;
 	__u8     data[240];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_IO_CAPA_REQUEST		0x31
 struct hci_ev_io_capa_request {
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_SIMPLE_PAIR_COMPLETE	0x36
 struct hci_ev_simple_pair_complete {
 	__u8     status;
 	bdaddr_t bdaddr;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_REMOTE_HOST_FEATURES	0x3d
 struct hci_ev_remote_host_features {
 	bdaddr_t bdaddr;
 	__u8     features[8];
-} __attribute__ ((packed));
+} __packed;
 
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
 	__u16    type;
 	__u8     data[0];
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_SI_DEVICE	0x01
 struct hci_ev_si_device {
 	__u16    event;
 	__u16    dev_id;
-} __attribute__ ((packed));
+} __packed;
 
 #define HCI_EV_SI_SECURITY	0x02
 struct hci_ev_si_security {
@@ -864,7 +864,7 @@ struct hci_ev_si_security {
 	__u16    proto;
 	__u16    subproto;
 	__u8     incoming;
-} __attribute__ ((packed));
+} __packed;
 
 /* ---- HCI Packet structures ---- */
 #define HCI_COMMAND_HDR_SIZE 3
@@ -875,22 +875,22 @@ struct hci_ev_si_security {
 struct hci_command_hdr {
 	__le16	opcode;		/* OCF & OGF */
 	__u8 	plen;
-} __attribute__ ((packed));
+} __packed;
 
 struct hci_event_hdr {
 	__u8	evt;
 	__u8	plen;
-} __attribute__ ((packed));
+} __packed;
 
 struct hci_acl_hdr {
 	__le16	handle;		/* Handle & Flags(PB, BC) */
 	__le16	dlen;
-} __attribute__ ((packed));
+} __packed;
 
 struct hci_sco_hdr {
 	__le16	handle;
 	__u8	dlen;
-} __attribute__ ((packed));
+} __packed;
 
 #ifdef __KERNEL__
 #include <linux/skbuff.h>
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 4b86c783976c..636724b203ee 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -131,31 +131,31 @@ struct l2cap_conninfo {
 struct l2cap_hdr {
 	__le16     len;
 	__le16     cid;
-} __attribute__ ((packed));
+} __packed;
 #define L2CAP_HDR_SIZE		4
 
 struct l2cap_cmd_hdr {
 	__u8       code;
 	__u8       ident;
 	__le16     len;
-} __attribute__ ((packed));
+} __packed;
 #define L2CAP_CMD_HDR_SIZE	4
 
 struct l2cap_cmd_rej {
 	__le16     reason;
-} __attribute__ ((packed));
+} __packed;
 
 struct l2cap_conn_req {
 	__le16     psm;
 	__le16     scid;
-} __attribute__ ((packed));
+} __packed;
 
 struct l2cap_conn_rsp {
 	__le16     dcid;
 	__le16     scid;
 	__le16     result;
 	__le16     status;
-} __attribute__ ((packed));
+} __packed;
 
 /* channel indentifier */
 #define L2CAP_CID_SIGNALING	0x0001
@@ -179,14 +179,14 @@ struct l2cap_conf_req {
 	__le16     dcid;
 	__le16     flags;
 	__u8       data[0];
-} __attribute__ ((packed));
+} __packed;
 
 struct l2cap_conf_rsp {
 	__le16     scid;
 	__le16     flags;
 	__le16     result;
 	__u8       data[0];
-} __attribute__ ((packed));
+} __packed;
 
 #define L2CAP_CONF_SUCCESS	0x0000
 #define L2CAP_CONF_UNACCEPT	0x0001
@@ -197,7 +197,7 @@ struct l2cap_conf_opt {
 	__u8       type;
 	__u8       len;
 	__u8       val[0];
-} __attribute__ ((packed));
+} __packed;
 #define L2CAP_CONF_OPT_SIZE	2
 
 #define L2CAP_CONF_HINT		0x80
@@ -218,7 +218,7 @@ struct l2cap_conf_rfc {
 	__le16     retrans_timeout;
 	__le16     monitor_timeout;
 	__le16     max_pdu_size;
-} __attribute__ ((packed));
+} __packed;
 
 #define L2CAP_MODE_BASIC	0x00
 #define L2CAP_MODE_RETRANS	0x01
@@ -229,22 +229,22 @@ struct l2cap_conf_rfc {
 struct l2cap_disconn_req {
 	__le16     dcid;
 	__le16     scid;
-} __attribute__ ((packed));
+} __packed;
 
 struct l2cap_disconn_rsp {
 	__le16     dcid;
 	__le16     scid;
-} __attribute__ ((packed));
+} __packed;
 
 struct l2cap_info_req {
 	__le16      type;
-} __attribute__ ((packed));
+} __packed;
 
 struct l2cap_info_rsp {
 	__le16      type;
 	__le16      result;
 	__u8        data[0];
-} __attribute__ ((packed));
+} __packed;
 
 /* info type */
 #define L2CAP_IT_CL_MTU     0x0001
diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 921d7b3c7f8d..a140847d622c 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -106,19 +106,19 @@ struct rfcomm_hdr {
 	u8 addr;
 	u8 ctrl;
 	u8 len;    // Actual size can be 2 bytes
-} __attribute__ ((packed));
+} __packed;
 
 struct rfcomm_cmd {
 	u8 addr;
 	u8 ctrl;
 	u8 len;
 	u8 fcs;
-} __attribute__ ((packed));
+} __packed;
 
 struct rfcomm_mcc {
 	u8 type;
 	u8 len;
-} __attribute__ ((packed));
+} __packed;
 
 struct rfcomm_pn {
 	u8  dlci;
@@ -128,7 +128,7 @@ struct rfcomm_pn {
 	__le16 mtu;
 	u8  max_retrans;
 	u8  credits;
-} __attribute__ ((packed));
+} __packed;
 
 struct rfcomm_rpn {
 	u8  dlci;
@@ -138,17 +138,17 @@ struct rfcomm_rpn {
 	u8  xon_char;
 	u8  xoff_char;
 	__le16 param_mask;
-} __attribute__ ((packed));
+} __packed;
 
 struct rfcomm_rls {
 	u8  dlci;
 	u8  status;
-} __attribute__ ((packed));
+} __packed;
 
 struct rfcomm_msc {
 	u8  dlci;
 	u8  v24_sig;
-} __attribute__ ((packed));
+} __packed;
 
 /* ---- Core structures, flags etc ---- */
 
-- 
cgit v1.2.3-55-g7522